SLIDE 13 http://comsys.rwth-aachen.de
25 Performance- & Analysis-Feedback Prediction Fix bugs Analysis
Instruction Chains
Per Function Per Platform Performance Prediction Network Function
1 #include <bcc/proto.h> 2 #include <linux/pkt_cls.h> 3 #include <net/sock.h> 4 BPF_TABLE("hash", uint16_t, uint8_t, blocked_dports, 4096); 5 6 int act_main(struct __sk_buff *skb) { 7 u8 *cursor = 0; 8 struct ethernet_t *ethernet; 9 struct ip_t *ip; 10 struct tcp_t *tcp; 11 if (skb->len < sizeof(*ethernet)+sizeof(*ip)+sizeof(*tcp)) return TC_ACT_UNSPEC; 12 13 ethernet = cursor_advance(cursor, sizeof(*ethernet)); 14 if (!(ethernet->type == 0x0800)) return TC_ACT_UNSPEC; 15 ip = cursor_advance(cursor, sizeof(*ip)); 16 if (ip->nextp != 0x06) return TC_ACT_UNSPEC; 17 18 tcp = cursor_advance(cursor, sizeof(*tcp)); 19 uint16_t dport = tcp->dst_port; 20 uint8_t *blocked_p = blocked_dports.lookup(&dport); 21 if (!blocked_p) goto return TC_ACT_OK; 22 uint8_t blocked = *blocked_p; 23 if (blocked) return TC_ACT_PIPE; 24 else return TC_ACT_OK; 25 }
Execution Tree Instruction Chains
1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 9 bpf_load_byte 10 icmp 11 br 12 bpf_load_half 13 trunc 14 store 15 bpf_pseudo 16 bpf_map_lookup 17 hash(2, 1) 18 icmp 19 br 20 load 21 icmp 22 br
Performance Prediction
. . .
Offline Calibration
Measure instruction costs
CPU cycles needed for an add instruction on our hardware (i7-870)
Per Platform Instruction Cost Databases
Cisco Juniper AWS Carrier Cloud Our HW Linux Intel i7
Symbolic Execution 1 Fix Bugs 2 Iterate Over All Paths 3 Store 4 Predict 5 6 Increase Packet Rate, Reduce Latency, Harden Against Attacks Per Function Per Platform Performance Prediction Network Function
1 #include <bcc/proto.h> 2 #include <linux/pkt_cls.h> 3 #include <net/sock.h> 4 BPF_TABLE("hash", uint16_t, uint8_t, blocked_dports, 4096); 5 6 int act_main(struct __sk_buff *skb) { 7 u8 *cursor = 0; 8 struct ethernet_t *ethernet; 9 struct ip_t *ip; 10 struct tcp_t *tcp; 11 if (skb->len < sizeof(*ethernet)+sizeof(*ip)+sizeof(*tcp)) return TC_ACT_UNSPEC; 12 13 ethernet = cursor_advance(cursor, sizeof(*ethernet)); 14 if (!(ethernet->type == 0x0800)) return TC_ACT_UNSPEC; 15 ip = cursor_advance(cursor, sizeof(*ip)); 16 if (ip->nextp != 0x06) return TC_ACT_UNSPEC; 17 18 tcp = cursor_advance(cursor, sizeof(*tcp)); 19 uint16_t dport = tcp->dst_port; 20 uint8_t *blocked_p = blocked_dports.lookup(&dport); 21 if (!blocked_p) goto return TC_ACT_OK; 22 uint8_t blocked = *blocked_p; 23 if (blocked) return TC_ACT_PIPE; 24 else return TC_ACT_OK; 25 }
Execution Tree Instruction Chains
1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 9 bpf_load_byte 10 icmp 11 br 12 bpf_load_half 13 trunc 14 store 15 bpf_pseudo 16 bpf_map_lookup 17 hash(2, 1) 18 icmp 19 br 20 load 21 icmp 22 br
·
Performance Prediction
. . .
Offline Calibration
Measure instruction costs
CPU cycles needed for an add instruction on our hardware (i7-870)
Per Platform Instruction Cost Databases
Cisco Juniper AWS Carrier Cloud Our HW Linux Intel i7
Symbolic Execution 1 Fix Bugs 2 Iterate Over All Paths 3 Store 4 Predict 5 6 Increase Packet Rate, Reduce Latency, Harden Against Attacks
...
Symbolic Analysis
Per Function Per Platform Performance Prediction Network Function
1 #include <bcc/proto.h> 2 #include <linux/pkt_cls.h> 3 #include <net/sock.h> 4 BPF_TABLE("hash", uint16_t, uint8_t, blocked_dports, 4096); 5 6 int act_main(struct __sk_buff *skb) { 7 u8 *cursor = 0; 8 struct ethernet_t *ethernet; 9 struct ip_t *ip; 10 struct tcp_t *tcp; 11 if (skb->len < sizeof(*ethernet)+sizeof(*ip)+sizeof(*tcp)) return TC_ACT_UNSPEC; 12 13 ethernet = cursor_advance(cursor, sizeof(*ethernet)); 14 if (!(ethernet->type == 0x0800)) return TC_ACT_UNSPEC; 15 ip = cursor_advance(cursor, sizeof(*ip)); 16 if (ip->nextp != 0x06) return TC_ACT_UNSPEC; 17 18 tcp = cursor_advance(cursor, sizeof(*tcp)); 19 uint16_t dport = tcp->dst_port; 20 uint8_t *blocked_p = blocked_dports.lookup(&dport); 21 if (!blocked_p) goto return TC_ACT_OK; 22 uint8_t blocked = *blocked_p; 23 if (blocked) return TC_ACT_PIPE; 24 else return TC_ACT_OK; 25 }
Execution Tree
if (skb->len < sizeof(*ethernet)+sizeof(*ip)+sizeof(*tcp)) {} return TC_ACT_UNSPEC {len < 54} if (!(ethernet->type == 0x0800)) {len ≥ 54} return TC_ACT_UNSPEC {len ≥ 54, read (data + 12) ̸= 2048} uint16_t dport = tcp->dst_port; {len ≥ 54, read (data + 12) = 2048} uint8_t *blocked_p = blocked_dports.lookup(&dport); {len ≥ 54, read (data + 12) = 2048} if (!blocked_p) {len ≥ 54, read (data + 12) = 2048} return TC_ACT_OK {len ≥ 54, read (data + 12) = 2048, λ = 0} uint8_t blocked = *blocked_p; {len ≥ 54, read (data + 12) = 2048, λ ̸= 0} if (blocked) {len ≥ 54, read (data + 12) = 2048, λ ̸= 0} return TC_ACT_PIPE {len ≥ 54, read (data + 12) = 2048, λ ̸= 0, read (λ) ̸= 0} return TC_ACT_OK {len ≥ 54, read (data + 12) = 2048, λ ̸= 0, read (λ) = 0}
Instruction Chains
1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 9 bpf_load_byte 10 icmp 11 br 12 bpf_load_half 13 trunc 14 store 15 bpf_pseudo 16 bpf_map_lookup 17 hash(2, 1) 18 icmp 19 br 20 load 21 icmp 22 br
Performance Prediction
. . .
Offline Calibration
Measure instruction costs
CPU cycles needed for an add instruction on our hardware (i7-870)
Per Platform Instruction Cost Databases
Cisco Juniper AWS Carrier Cloud Our HW Linux Intel i7
Symbolic Execution 1 Fix Bugs 2 Iterate Over All Paths 3 Store 4 Predict 5 6 Increase Packet Rate, Reduce Latency, Harden Against Attacks
Execution Tree
Per Function Per Platform Performance Prediction Network Function
1 #include <bcc/proto.h> 2 #include <linux/pkt_cls.h> 3 #include <net/sock.h> 4 BPF_TABLE("hash", uint16_t, uint8_t, blocked_dports, 4096); 5 6 int act_main(struct __sk_buff *skb) { 7 u8 *cursor = 0; 8 struct ethernet_t *ethernet; 9 struct ip_t *ip; 10 struct tcp_t *tcp; 11 if (skb->len < sizeof(*ethernet)+sizeof(*ip)+sizeof(*tcp)) return TC_ACT_UNSPEC; 12 13 ethernet = cursor_advance(cursor, sizeof(*ethernet)); 14 if (!(ethernet->type == 0x0800)) return TC_ACT_UNSPEC; 15 ip = cursor_advance(cursor, sizeof(*ip)); 16 if (ip->nextp != 0x06) return TC_ACT_UNSPEC; 17 18 tcp = cursor_advance(cursor, sizeof(*tcp)); 19 uint16_t dport = tcp->dst_port; 20 uint8_t *blocked_p = blocked_dports.lookup(&dport); 21 if (!blocked_p) goto return TC_ACT_OK; 22 uint8_t blocked = *blocked_p; 23 if (blocked) return TC_ACT_PIPE; 24 else return TC_ACT_OK; 25 }
Execution Tree Instruction Chains
1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 1 alloca 2 getelementptr 3 load 4 icmp 5 br 6 bpf_load_half 7 icmp 8 br 9 bpf_load_byte 10 icmp 11 br 12 bpf_load_half 13 trunc 14 store 15 bpf_pseudo 16 bpf_map_lookup 17 hash(2, 1) 18 icmp 19 br 20 load 21 icmp 22 br
Performance Prediction
. . .
Offline Calibration
Measure instruction costs
CPU cycles needed for an add instruction on our hardware (i7-870)
Per Platform Instruction Cost Databases
Cisco Juniper AWS Carrier Cloud Our HW Linux Intel i7
Symbolic Execution 1 Fix Bugs 2 Iterate Over All Paths 3 Store 4 Predict 5 6 Increase Packet Rate, Reduce Latency, Harden Against Attacks
Network Function Code
Instruction- Cache- & CPU-Model Traffic Pattern
100 200 300 400 CPU Cycles 0.00 0.01 0.02 Frequency 0.00 0.25 0.50 0.75 1.00 CDF measured predicted 5 5 Rate [Million pkt/s] 250 500 750 1000 1250 CPU Cycles 0.000 0.001 0.002 0.003 Frequency 0.00 0.25 0.50 0.75 1.00 CDF measured predicted 5 5 4 3 2 Rate [Million pkt/s]
Performance Predictions
100 200 300 CPU Cycles 0.00 0.05 0.10 Frequency 0.00 0.25 0.50 0.75 1.00 CDF measured predicted 100 200 300 400 CPU Cycles 0.00 0.02 0.04 Frequency 0.00 0.25 0.50 0.75 1.00 CDF measured predicted 100 200 300 400 CPU Cycles 0.00 0.05 0.10 Frequency 0.00 0.25 0.50 0.75 1.00 CDF measured predicted
Pre-Deployment Performance Prediction of On-Path NFs
http://comsys.rwth-aachen.de
26
Challenges in Softwarized Communication Systems
Trend: Software plays an increasingly important role in networking
Protocols, billions of apps, etc. Network elements become flexible (SDN, NFV, In-network processing)
Important: Analysis of real code – not models
Switch Switch Switch Data Center Data Center
Ne Netwo worked Systems (protocols, apps)
Sensors Actuators
Switch Switch Switch Cloud- based Control
wired network
Lat Latenc ency-cr critica cal networke ked co control
Networked Systems
Edge: Protocols, Apps, ... Core: Network Functions
In-Network Processing
Reducing latency Networked control
Predictable?
(performance, resources)
Reliability?
(bugs, loops)
Reliability!
(bugs, loops)
Predictable!
(performance, resources)