Checked C
UM
Michael Hicks The University of Maryland joint work with David Tarditi (MSR), Andrew Ruef (UMD), Sam Elliott (UW)
Checked C Michael Hicks The University of Maryland joint work with - - PowerPoint PPT Presentation
Checked C Michael Hicks The University of Maryland joint work with David Tarditi (MSR), Andrew Ruef (UMD), Sam Elliott (UW) UM Motivation - Lots of C/C++ code out there. - One open source code indexer (openhub.net) found 8.5 billion lines of C
Michael Hicks The University of Maryland joint work with David Tarditi (MSR), Andrew Ruef (UMD), Sam Elliott (UW)
billion lines of C code.
level mitigations
2
Checked C is another take at making system software written in C more reliable and secure. Approach:
conversion of code to be type-safe.
3
https://github.com/Microsoft/checkedc https://github.com/Microsoft/checkedc-clang
4
Unlike Cyclone Like Cyclone
5
Checked C's
LLVM IR Generator Analyses
6
T* val = malloc(sizeof(T)); T* vals = calloc(n, sizeof(T)); T vals[N] = { ... }; ptr<T> val = malloc(sizeof(T));
Singleton Pointer Array Pointer
array_ptr<T> vals = calloc(n, sizeof(T)); T vals checked[N] = { ... };
7
Declaration Invariant
array_ptr<T> p : bounds(l, u) l ≤ p < u array_ptr<T> p : count(n) p ≤ p < p+n array_ptr<T> p : byte_count(n) p ≤ p < (char*)p + n
Expressions in bounds(l, u) must be non-modifying
8
char* str = calloc(n, sizeof(char)); char str[N] = { ... };
NT Array Pointer
nt_array_ptr<char> str : count(n-1) = calloc(n, sizeof(char)); char str checked[N+1] = { …, ‘\0’ };
nt_array_ptr<T> p : bounds(l, u) l ≤ p ≤ u && ∃c ≥ u. *c == ‘\0’ can read *u
9
size_t my_strlcpy( nt_array_ptr<char> dst: count(dst_sz - 1), nt_array_ptr<char> src, size_t dst_sz) { size_t i = 0; nt_array_ptr<char> s : count(i) = src; while (s[i] != ’\0’ && i < dst_sz - 1) { //bounds on s may expand by 1 dst[i] = s[i]; ++i; } dst[i] = ’\0’; //ok to write to upper bound return i; }
10
int i = *p; *p = 0; *p += 1; (*p)++; Assignment Compound Assignment Increment/Decrement p[n] p->field Pointer Dereference Elided if the compiler can prove the access is safe
11
bool echo( int16_t user_length, size_t user_payload_len, char *user_payload, resp_t *resp) { char *resp_data = malloc(user_length); resp->payload_buf = resp_data; resp->payload_buf_len = user_length; // memcpy(resp->payload_buf, user_payload_buf, user_length) for (int i = 0; i < user_length; i++) { resp->payload_buf[i] = user_payload_buf[i]; } return true; }
user_length is
provided by user
user_payload_len is
from the parser Copy data from user_payload into new buffer in
resp object
typedef struct { size_t payload_len; char *payload; // ... } resp_t;
Example inspired by Heartbleed error
12
bool echo( int16_t user_length, size_t user_payload_len, char *user_payload, resp_t *resp) { char *resp_data = malloc(user_length); resp->payload = resp_data; resp->payload_len = user_length; // memcpy(resp->payload_buf, user_payload_buf, user_length) for (int i = 0; i < user_length; i++) { resp->payload_buf[i] = user_payload_buf[i]; } return true; }
malloc could fail
Copy data from user_payload into new buffer in
resp object user_length is
provided by user
user_payload_len is
from the parser
typedef struct { size_t payload_len; char *payload; // ... } resp_t;
13
bool echo( int16_t user_length, size_t user_payload_len, char *user_payload, resp_t *resp) { char *resp_data = malloc(user_length); resp->payload = resp_data; resp->payload_len = user_length; // memcpy(resp->payload, user_payload, user_length) for (size_t i = 0; i < user_length; i++) { resp->payload[i] = user_payload[i]; } return true; }
Copy data from user_payload into new buffer in
resp object user_length is
provided by user
typedef struct { size_t payload_len; char *payload; // ... } resp_t;
user_payload_len is
from the parser
14
bool echo( int16_t user_length, size_t user_payload_len, array_ptr<char> user_payload, ptr<resp_t> resp) { array_ptr<char> resp_data = malloc(user_length); resp->payload = resp_data; resp->payload_len = user_length; // memcpy(resp->payload, user_payload, user_length) for (size_t i = 0; i < user_length; i++) { resp->payload[i] = user_payload[i]; } return true; }
Step 1: Manually Convert to Checked Types
typedef struct { size_t payload_len; array_ptr<char> payload; // ... } resp_t;
15
bool echo( int16_t user_length, size_t user_payload_len, array_ptr<char> user_payload : count(user_payload_len), ptr<resp_t> resp) { array_ptr<char> resp_data : count(user_length) = malloc(user_length); resp->payload = resp_data; resp->payload_len = user_length; // memcpy(resp->payload, user_payload, user_length) for (size_t i = 0; i < user_length; i++) { resp->payload[i] = user_payload[i]; } return true; }
Step 2: Manually Add Bounds Declarations
typedef struct { size_t payload_len; array_ptr<char> payload : count(payload_len); // ... } resp_t;
bool echo( int16_t user_length, size_t user_payload_len, array_ptr<char> user_payload : count(user_payload_len), ptr<resp_t> resp) { array_ptr<char> resp_data : count(user_length) = malloc(user_length); dynamic_check(resp != NULL); resp->payload = resp_data; resp->payload_len = user_length; // memcpy(resp->payload, user_payload, user_length) for (size_t i = 0; i < user_length; i++) { dynamic_check(user_payload != NULL); dynamic_check(user_payload <= &user_payload[i]); dynamic_check(&user_payload[i] < user_payload + user_payload_len); dynamic_check(resp->payload != NULL); dynamic_check(resp->payload <= &resp->payload[i]); dynamic_check(&resp->payload[i] < resp->payload + resp->payload_len resp->payload[i] = user_payload[i]; } return true; }
No Memory Disclosure
malloc now checked
Step 3: Compiler Inserts Checks Automatically
16
bool echo( int16_t user_length, size_t user_payload_len, array_ptr<char> user_payload : count(user_payload_len), ptr<resp_t> resp) { array_ptr<char> resp_data : count(user_length) = malloc(user_length); dynamic_check(resp != NULL); resp->payload = resp_data; resp->payload_len = user_length; // memcpy(resp->payload, user_payload, user_length) for (size_t i = 0; i < user_length; i++) { dynamic_check(user_payload != NULL); dynamic_check(user_payload <= &user_payload[i]); dynamic_check(&user_payload[i] < user_payload + user_payload_len); dynamic_check(resp->payload != NULL); dynamic_check(resp->payload <= &resp->payload[i]); dynamic_check(&resp->payload[i] < resp->payload + resp->payload_len resp->payload[i] = user_payload[i]; } return true; }
No Memory Disclosure
malloc now checked
Step 3: Compiler Inserts Checks Automatically
17
18
bool echo( int16_t user_length, size_t user_payload_len, array_ptr<char> user_payload : count(user_payload_len), ptr<resp_t> resp) { array_ptr<char> resp_data : count(user_length) = malloc(user_length); dynamic_check(resp != NULL); resp->payload = resp_data; resp->payload_len = user_length; dynamic_check(user_payload != NULL); dynamic_check(resp->payload != NULL); // memcpy(resp->payload, user_payload, user_length) for (size_t i = 0; i < user_length; i++) { dynamic_check(i <= user_payload_len); resp->payload[i] = user_payload[i]; } return true; }
No Memory Disclosure
malloc still checked
Step 4: Restrictions on bounds expressions may allow removal
19
void more(int *b, int idx, ptr<int *>out) { int oldidx = idx, c; do { c = readvalue(); b[idx++] = c; //could overflow b? } while (c != 0); *out = b+idx-oldidx; //bad if out corrupted }
internally safe. Made so by disallowing
safe and unsafe code
20
21
void foo(int *out) { _Ptr<int> ptrout; if (out != (int *)0) { ptrout = (_Ptr<int>)out; // cast OK } else { return; } checked { int b checked[5][5]; for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { b[i][j] = -1; // access safe } } *ptrout = b[0][0]; } }
unchecked code checked code
What assurance do checked regions give us?
region pointer accesses are safe
Proved this property in a simple formalization of the Checked C type system
22
Annotate Unchecked Pointers with Bounds, in code and in library function prototypes
23
T* val : itype(ptr<T>) = malloc(sizeof(T)); T* vals : count(n) = calloc(n, sizeof(T)); size_t fwrite(void *p : byte_count(s*n), size_t s, size_t n, FILE *st : itype(_Ptr<FILE>));
24
bounds inference, subsumption checking, dynamic check insertion
25
26
27
28
Benchmark LoC Description Olden: bh 1,162
Barnes & Hut N-body force computation
Olden: bisort 263
Forward & Backward Bitonic Sort
Olden: em3d 478
3D Electromagnetic Wave Propagation
Olden: health 389
Columbian Health Care Simulation
Olden: mst 328
Minimum Spanning Tree
Olden: perimeter 399
Perimeters of Regions on Images
Olden: power 458
Power Pricing Optimisation Solver
Olden: treeadd 180
Recursive Sum over Tree
Olden: tsp 420
Travelling Salesman Problem
Olden: voronoi 814
Computes voronoi diagram of a set of points
Ptrdist: anagram 362
Finding Anagrams from a Dictionary
Ptrdist: bc 5,194
Arbitrary precision calculator
Ptrdist: ft 893
Minimum Spanning Tree using Fibonacci heaps
Ptrdist: ks 552
Schweikert-Kernighan Graph Partitioning
Ptrdist: yacr2 2,529
VSLI Channel Router
14.6% 14.6% 14.6% 14.6% 14.6% 14.6% 14.6% 14.6% 14.6% 14.6% 14.6% 14.6% 14.6%
yacr2 ks ft anagram tsp treadd power perimeter mst health em3d bisort bh 0% 10% 20% 30%
Lines Modified (%) Benchmark 83.9% 83.9% 83.9% 83.9% 83.9% 83.9% 83.9% 83.9% 83.9% 83.9% 83.9% 83.9% 83.9%
yacr2 ks ft anagram tsp treadd power perimeter mst health em3d bisort bh 0% 20% 40% 60% 80% 100%
Easy Modifications (%) 10.6% 10.6% 10.6% 10.6% 10.6% 10.6% 10.6% 10.6% 10.6% 10.6% 10.6% 10.6% 10.6%
yacr2 ks ft anagram tsp treadd power perimeter mst health em3d bisort bh 0% 10% 20% 30%
Lines Unchecked (%) Benchmark Suite
Olden Ptrdist
29
+ 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2% + 8.2%
yacr2 ks ft anagram tsp treadd power perimeter mst health em3d bisort bh −20% 0% + 20% + 40% + 60%
Runtime Slowdown (±%) Benchmark + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5% + 19.5%
yacr2 ks ft anagram tsp treadd power perimeter mst health em3d bisort bh −25% 0% + 25% + 50% + 75%
Compile Time Slowdown (±%) + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3% + 6.3%
yacr2 ks ft anagram tsp treadd power perimeter mst health em3d bisort bh −20% 0% + 20% + 40% + 60%
Executable Size Change (±%) Benchmark Suite
Olden Ptrdist
30
(flow-sensitive types)
framework
31
bounds declarations, not made “fat”
reason about mixed code
meantime
32
https://github.com/Microsoft/checkedc https://github.com/Microsoft/checkedc-clang