Timotej Kapus Cristian Cadar Imperial College London 1 Symbolic - - PowerPoint PPT Presentation

timotej kapus cristian cadar imperial college london
SMART_READER_LITE
LIVE PREVIEW

Timotej Kapus Cristian Cadar Imperial College London 1 Symbolic - - PowerPoint PPT Presentation

Timotej Kapus Cristian Cadar Imperial College London 1 Symbolic Execution Program analysis technique Active research area Used in industry IntelliTest, SAGE Angr KLOVER 2 Why symbolic execution? No


slide-1
SLIDE 1

Timotej Kapus Cristian Cadar Imperial College London

1

slide-2
SLIDE 2

Symbolic Execution

2

  • Program analysis technique
  • Active research area
  • Used in industry

○ IntelliTest, SAGE ○ KLOVER

Angr

slide-3
SLIDE 3

Why symbolic execution?

3

  • No false-positives!

○ Every bug found has a concrete input triggering it

  • Can interact with the environment

○ I/O, unmodeled libraries

  • Only relevant code executed

“symbolically”, the rest is fast “native” execution

slide-4
SLIDE 4

Why (not) symbolic execution?

4

  • Scalability, scalability, scalability

○ Constraint solving is hard ○ Path explosion

slide-5
SLIDE 5

This talk

5

Show a segmented memory model that tackles path explosion due to dereferences of symbolic pointers through the use of static pointer alias analysis

slide-6
SLIDE 6

1D symbolic pointers

int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if (vector[i] > 8) printf("big element\n"); else printf("small element");

6

slide-7
SLIDE 7

1D Symbolic pointers

int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if(vector[i] > 8) printf("big element\n"); else printf("small element");

7

i = symbolic

slide-8
SLIDE 8

1D Symbolic pointers

int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if(vector[i] > 8) printf("big element\n"); else printf("small element");

8

i = symbolic vector = {1,2,...}

slide-9
SLIDE 9

1D Symbolic pointers

int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if(vector[i] > 8) printf("big element\n"); else printf("small element");

9

i = symbolic

vector[i] > 8

vector = {1,2,...}

slide-10
SLIDE 10

1D Symbolic pointers

int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if(vector[i] > 8) printf("big element\n"); else printf("small element");

10

i = symbolic

vector[i] > 8 printf("big element\n");

vector = {1,2,...}

slide-11
SLIDE 11

1D Symbolic pointers

int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if(vector[i] > 8) printf("big element\n"); else printf("small element");

11

i = symbolic

vector[i] > 8 printf("big element\n"); printf("small element");

vector = {1,2,...}

slide-12
SLIDE 12

char i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if(vector[i] > 8) printf("big element\n"); else printf("small element");

12

i = symbolic

printf("big element\n");

1D Symbolic pointers

vector[i] > 8 printf("small element");

  • vector[i] is a dereference of a

symbolic pointer ○ Concrete base address ○ Some symbolic offset i

  • I.e. if vector is at 0xdeedbeef

vector[i] is a load (0xdeedbeef + i)

vector = {1,2,...}

slide-13
SLIDE 13

Constraints over memory

13

  • Theory of arrays:

○ read: array × index → value ○ write: array × index × value → array ○ read(write(a, p, v), r) = v if p = r

read(write(a, p, v), r) = read(a,r) if p ≠ r

  • Simply map C arrays to solver arrays
  • Use concrete addresses to resolve C arrays to solver arrays
slide-14
SLIDE 14

1D Symbolic pointers: constraints in theory of arrays

i = symbolic int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if (vector[i] > 8) printf("big element"); else printf("small element");

14

slide-15
SLIDE 15

1D Symbolic pointers: constraints in theory of arrays

i = symbolic int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if (vector[i] > 8) printf("big element"); else printf("small element"); array vector[10] = [1 2 3 4 5 6 7 8 9 10]

15

slide-16
SLIDE 16

1D Symbolic pointers: constraints in theory of arrays

i = symbolic int i; make_symbolic(i); int vector[10] = {1,2,3,4,5,6,7,8,9,10}; if (vector[i] > 8) printf("big element"); else printf("small element"); array vector[10] = [1 2 3 4 5 6 7 8 9 10]

(Read i vector)

16

slide-17
SLIDE 17

2D Symbolic pointers

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, sizeof(int)); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

17

slide-18
SLIDE 18

2D Symbolic pointers: constraints in theory of arrays

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

18

slide-19
SLIDE 19

2D Symbolic pointers: constraints in theory of arrays

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

array matrix[3] = [0xdeedbeef 0xdeedbef0 0xdeedbef1]

19

slide-20
SLIDE 20

2D Symbolic pointers: constraints in theory of arrays

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

array matrix[3] = [0xdeedbeef 0xdeedbef0 0xdeedbef1]

array matrix_0[3] = [0 0 0] array matrix_1[3] = [0 0 42] array matrix_2[3] = [0 0 0]

20

slide-21
SLIDE 21

2D Symbolic pointers: constraints in theory of arrays

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

array matrix[3] = [0xdeedbeef 0xdeedbef0 0xdeedbef1]

array matrix_0[3] = [0 0 0] array matrix_1[3] = [0 0 42] array matrix_2[3] = [0 0 0]

(Read i matrix)

21

slide-22
SLIDE 22

2D Symbolic pointers: constraints in theory of arrays

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

array matrix[3] = [0xdeedbeef 0xdeedbef0 0xdeedbef1]

array matrix_0[3] = [0 0 0] array matrix_1[3] = [0 0 42] array matrix_2[3] = [0 0 0]

(Read j (Read i matrix))

22

slide-23
SLIDE 23

2D Symbolic pointers: constraints in theory of arrays

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

array matrix[3] = [0xdeedbeef 0xdeedbef0 0xdeedbef1]

array matrix_0[3] = [0 0 0] array matrix_1[3] = [0 0 42] array matrix_2[3] = [0 0 0]

(Read j 0xdeedbeef)

23

slide-24
SLIDE 24

2D Symbolic pointers: constraints in theory of arrays

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

array matrix[3] = [0xdeedbeef 0xdeedbef0 0xdeedbef1]

array matrix_0[3] = [0 0 0] array matrix_1[3] = [0 0 42] array matrix_2[3] = [0 0 0]

(Read j 0xdeedbeef)

24

slide-25
SLIDE 25

So what now?

  • Forking (KLEE)

○ Concretize and fork for each possible value of matrix[i]

  • State Merging / OR Expression (SAGE)

○ Create a disjunction over all possible values of matrix[i]

  • Flat Memory (considered by EXE, not implemented)

○ Have the whole memory as a single array

25

slide-26
SLIDE 26

2D Symbolic pointers: Forking

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = 0 j = symbolic

26

slide-27
SLIDE 27

2D Symbolic pointers: Forking

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = 0 j = symbolic

array matrix_0[3] = [0 0 0]

27

slide-28
SLIDE 28

2D Symbolic pointers: Forking

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = 0 j = symbolic

array matrix_0[3] = [0 0 0]

(Read j matrix_0)

28

slide-29
SLIDE 29

2D Symbolic pointers: Forking

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = 2 j = symbolic

array matrix_2[3] = [0 0 0]

29

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero"); int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

(Read j matrix_2)

slide-30
SLIDE 30

Path explosion

30

slide-31
SLIDE 31

2D Symbolic pointers: State Merging

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = 0 ∨ 1 ∨ 2 j = symbolic

31

slide-32
SLIDE 32

2D Symbolic pointers: State Merging

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = 0 ∨ 1 ∨ 2 j = symbolic

32

array matrix_0[3] = [0 0 0] array matrix_1[3] = [0 0 42] array matrix_2[3] = [0 0 0]

slide-33
SLIDE 33

2D Symbolic pointers: State Merging

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = 0 ∨ 1 ∨ 2 j = symbolic

(Read j matrix_0) ∨ (Read j matrix_1) ∨ (Read j matrix_2)

33

array matrix_0[3] = [0 0 0] array matrix_1[3] = [0 0 42] array matrix_2[3] = [0 0 0]

slide-34
SLIDE 34

OR expressions are hard(-er) to solve

34

slide-35
SLIDE 35

2D Symbolic pointers: Flat memory

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

35

slide-36
SLIDE 36

2D Symbolic pointers: Flat memory

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

36

array memory[12] = [ 3 6 9 42 0]

slide-37
SLIDE 37

2D Symbolic pointers: Flat memory

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

37

array memory[12] = [ 3 6 9 42 0]

Note that calloc return 3,6,9 as the addresses of the rows now

slide-38
SLIDE 38

2D Symbolic pointers: Flat memory

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

i = symbolic j = symbolic

(Read (3*i + j + 3) memory)

38

array memory[12] = [ 3 6 9 42 0]

slide-39
SLIDE 39

Unnecessarily large array

39

slide-40
SLIDE 40

Our approach

  • Use static pointer alias analysis
  • Partition memory objects into segments

○ Each pointer only points to a single segment

  • Assign segments to solver arrays

40

slide-41
SLIDE 41

Our approach: partitioning into segments

41

pts(p1) = {A, B}

slide-42
SLIDE 42

Our approach: partitioning into segments

42

p1 A B pts(p1) = {A, B}

slide-43
SLIDE 43

Our approach: partitioning into segments

43

p1 A B pts(p1) = {A, B} pts(p2) = {B, C}

slide-44
SLIDE 44

Our approach: partitioning into segments

44

p1 p2 A B C pts(p1) = {A, B} pts(p2) = {B, C}

slide-45
SLIDE 45

Our approach: partitioning into segments

45

p1 p2 A B C pts(p1) = {A, B} pts(p2) = {B, C}

slide-46
SLIDE 46

2D Symbolic pointers: Segmented Memory

i = symbolic j = symbolic

46

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

slide-47
SLIDE 47

2D Symbolic pointers: Segmented Memory

i = symbolic j = symbolic

47

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

array segment_0[3] = [0xdeedbef0 0xdeedbef3 0xdeedbef6] array segment_1[9] = [ 0 42 0 ]

slide-48
SLIDE 48

2D Symbolic pointers: Segmented Memory

array segment_0[3] = [0xdeedbef0 0xdeedbef3 0xdeedbef6] array segment_1[9] = [ 0 42 0 ]

i = symbolic j = symbolic

48

int i, j; make_symbolic(i, j); int *matrix[3]; for (int k = 0; k < 3; k++) matrix[i] = calloc(3, 4); matrix[1][2] = 42; if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

(Read (3*i + j) segment_1)

slide-49
SLIDE 49

Results

  • Based on an implementation in KLEE
  • Synthetic benchmarks

○ Based on the matrix example ○ Time it takes symbolic execution to explore all paths ○ Increase N - the dimensionality of the matrix

  • Real programs

49

make m4

slide-50
SLIDE 50

NxN matrix: single lookup extra allocation

int i, j; make_symbolic(i, j); int *matrix[N]; for (int k = 0; k < N; k++) matrix[i] = calloc(N, sizeof(int)); matrix[1][2] = 42; malloc(30000); //extra allocation if (matrix[i][j] > 8) printf("big element\n"); else printf("zero");

50

slide-51
SLIDE 51

NxN matrix: single lookup extra allocation

51

slide-52
SLIDE 52

Real programs experiment setup

  • We first look at cases that benefit

from segmented memory model ○ Hash tables ○ Deep in the search space

  • Targeted input files
  • 2 hour timeout
  • DFS, BFS, default

52

1 define(`A', `l') 2 define(`P', 2) 3 ? 4 ? 5 ifelse(?, P, eval(1 + P))

Targeted input file for m4

slide-53
SLIDE 53

m4 DFS

53

slide-54
SLIDE 54

m4 BFS

54

slide-55
SLIDE 55

m4 default

55

slide-56
SLIDE 56

make DFS

56

slide-57
SLIDE 57

Segmented memory model without symbolic dereferences

  • 105 coreutils

○ No symbolic dereferences

  • 1 hour run with DFS and forking model
  • Segmented memory model:

○ 18 coreutils timed out in 1h 20min ○ Remaining coreutils on average 4% slower

  • We envision using this after running the

forking model

57

slide-58
SLIDE 58

Conclusion

  • Symbolic pointers are a hard problem

○ 3 existing options: forking, flat memory, merging

  • Novel approach: Segmented memory model

○ Builds on flat memory model ○ Uses pointer alias analysis ○ Faster on programs with symbolic pointer dereferences

58

slide-59
SLIDE 59

Interested? Looking for a Postdoc?

c.cadar@imperial.ac.uk srg.doc.ic.ac.uk/vacancies/

59

slide-60
SLIDE 60

60

slide-61
SLIDE 61

NxN matrix: single lookup

61

slide-62
SLIDE 62

Symbolic execution example: get_sign

62

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

slide-63
SLIDE 63

63

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x);

slide-64
SLIDE 64

64

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); r = -1;

slide-65
SLIDE 65

65

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 r = -1;

slide-66
SLIDE 66

66

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x < 1 r = -1;

slide-67
SLIDE 67

67

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x < 1 x ≠ 0 r = -1; return r;

slide-68
SLIDE 68

68

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x < 1 x = 0 x ≠ 0 r = -1; return r; r = 0;

slide-69
SLIDE 69

69

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x < 1 x = 0 x ≠ 0 r = -1; return r; r = 0; return r;

slide-70
SLIDE 70

70

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x ≥ 1 x < 1 x = 0 x ≠ 0 r = -1; r = 1; return r; r = 0; return r;

slide-71
SLIDE 71

71

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x == 0 x ≥ 1 x < 1 x = 0 x ≠ 0 r = -1; r = 1; return r; r = 0; return r;

slide-72
SLIDE 72

72

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x == 0 x ≥ 1 x < 1 x ≠ 0 x = 0 x ≠ 0 r = -1; r = 1; return r; return r; r = 0; return r;

slide-73
SLIDE 73

73

int get_sign(int x) { int r = -1; if (x >= 1) r = 1; if (x == 0) r = 0; return r; }

get_sign(x); x >= 1 x == 0 x == 0 x ≥ 1 x < 1 x = 0 x ≠ 0 x = 0 x ≠ 0 r = -1; r = 1; return r; return r; r = 0; return r;