 
              Coverage-Guided Fuzzing Dynamic Static Smart Coverage Structure Algorithms Security Testing Andreas Zeller, Saarland University
Our Goal • We want to cause the program to fail • We have seen • random (unstructured) input • structured (grammar-based) input • generation based on grammar coverage
A Challenge class Roots { // Solve ax 2 + bx + c = 0 public roots(double a, double b, double c) { … } // Result: values for x double root_one, root_two; } • Which values for a, b, c should we test? assuming a, b, c, were 32-bit integers, we’d have (2 32 ) 3 ≈ 10 28 legal inputs with 1.000.000.000.000 tests/s, we would still require 2.5 billion years
The Code // Solve ax 2 + bx + c = 0 public roots(double a, double b, double c) { double q = b * b - 4 * a * c; if (q > 0 && a ≠ 0) { Test this case // code for handling two roots } else if (q == 0) { and this // code for handling one root } else { and this! // code for handling no roots } }
The Test Cases // Solve ax 2 + bx + c = 0 public roots(double a, double b, double c) { double q = b * b - 4 * a * c; if (q > 0 && a ≠ 0) { Test this case (a, b, c) = (3, 4, 1) // code for handling two roots } else if (q == 0) { and this (a, b, c) = (0, 0, 1) // code for handling one root } else { and this! (a, b, c) = (3, 2, 1) // code for handling no roots } }
A Defect // Solve ax 2 + bx + c = 0 public roots(double a, double b, double c) { double q = b * b - 4 * a * c; if (q > 0 && a ≠ 0) { // code for handling two roots } ↯ else if (q == 0) { (a, b, c) = (0, 0, 1) x = (-b) / (2 * a); } code must handle a = 0 else { // code for handling no roots } }
The Idea Use the program to guide test generation
The Ingredients Dynamic Static Smart Coverage Structure Algorithms
The Ingredients Dynamic Static Smart Coverage Structure Algorithms
Expressing Structure // Solve ax 2 + bx + c = 0 public roots(double a, double b, double c) { double q = b * b - 4 * a * c; if (q > 0 && a ≠ 0) { // code for handling two roots } else if (q == 0) { x = (-b) / (2 * a); } else { // code for handling no roots } }
Control Flow Graph public roots(double a, double b, double c) • A control flow graph expresses paths of program execution double q = b * b - 4 * a * c; • Nodes are basic blocks – q > 0 && a != 0 sequences of statements with // code for two roots one entry and one exit point q == 0 • Edges represent control flow – // code for one root the possibility that the program execution proceeds // code for no roots from the end of one basic block to the beginning of return another
Structural Testing public roots(double a, double b, double c) • The CFG can serve as an double q = b * b - 4 * a * c; adequacy criterion for test q > 0 && a != 0 cases // code for two roots • The more parts are covered (executed), the higher the q == 0 chance of a test to uncover a // code for one root defect // code for no roots • “parts” can be: nodes, edges, paths, conditions… return
Control Flow Patterns while ( COND ) do for BODY BODY INIT while ( COND ) while ( COND ) COND BODY ; do { BODY BODY if ( COND ) } while ( COND ); INCR THEN-BLOCK ELSE-BLOCK for ( INIT; COND; INCR) if ( COND ) BODY ; THEN-BLOCK; else ELSE-BLOCK ;
cgi_decode /** /** * @title cgi_decode * @title cgi_decode * @desc * @desc * Translate a string from the CGI encoding to plain ascii text * Translate a string from the CGI encoding to plain ascii text * ’+’ becomes space, %xx becomes byte with hex value xx, * ’+’ becomes space, %xx becomes byte with hex value xx, * other alphanumeric characters map to themselves * other alphanumeric characters map to themselves * * * returns 0 for success, positive for erroneous input * returns 0 for success, positive for erroneous input * 1 = bad hexadecimal digit * 1 = bad hexadecimal digit */ */ int cgi_decode(char *encoded, char *decoded) int cgi_decode(char *encoded, char *decoded) { { char *eptr = encoded; char *eptr = encoded; char *dptr = decoded; char *dptr = decoded; A int ok = 0; int ok = 0;
B while (*eptr) /* loop to end of string (‘\0’ character) */ while (*eptr) /* loop to end of string (‘\0’ character) */ { { char c; char c; C c = *eptr; c = *eptr; if (c == ’+’) { /* ‘+’ maps to blank */ if (c == ’+’) { /* ‘+’ maps to blank */ E *dptr = ’ ’; *dptr = ’ ’; } else if (c == ’%’) { /* ’%xx’ is hex for char xx */ } else if (c == ’%’) { /* ’%xx’ is hex for char xx */ D int digit_high = Hex_Values[*(++eptr)]; int digit_high = Hex_Values[*(++eptr)]; G int digit_low = Hex_Values[*(++eptr)]; int digit_low = Hex_Values[*(++eptr)]; if (digit_high == -1 || digit_low == -1) if (digit_high == -1 || digit_low == -1) I ok = 1; /* Bad return code */ ok = 1; /* Bad return code */ else else H *dptr = 16 * digit_high + digit_low; *dptr = 16 * digit_high + digit_low; } else { /* All other characters map to themselves */ } else { /* All other characters map to themselves */ *dptr = *eptr; *dptr = *eptr; F } } ++dptr; ++eptr; ++dptr; ++eptr; L } } *dptr = ‘\0’; /* Null terminator for string */ *dptr = ‘\0’; /* Null terminator for string */ M return ok; return ok; } }
int cgi_decode(char *encoded, char *decoded) A A { char *eptr = encoded; char *dptr = decoded; int ok = 0; B while (*eptr) { B False True C char c; C c = *eptr; if (c == '+') { False True D D E E *dptr = ' '; elseif (c == '%') { } True False else F G F G int digit_high = Hex_Values[*(++eptr)]; *dptr = *eptr; int digit_low = Hex_Values[*(++eptr)]; } if (digit_high == -1 || digit_low == -1) { False True H I H I else { ok = 1; } *dptr = 16 * digit_high + digit_low; } L ++dptr; L ++eptr; M M *dptr = '\0'; } return ok; }
int cgi_decode(char *encoded, char *decoded) “test” A ✔ A { char *eptr = encoded; char *dptr = decoded; int ok = 0; B ✔ while (*eptr) { B False True C ✔ char c; C c = *eptr; if (c == '+') { False True D E D E ✔ *dptr = ' '; elseif (c == '%') { } False True else F F G G ✔ int digit_high = Hex_Values[*(++eptr)]; *dptr = *eptr; int digit_low = Hex_Values[*(++eptr)]; } if (digit_high == -1 || digit_low == -1) { True False H I H I ok = 1; else { } *dptr = 16 * digit_high + digit_low; } ✔ L ++dptr; L ++eptr; M ✔ *dptr = '\0'; M } return ok; }
int cgi_decode(char *encoded, char *decoded) “test” “a+b” A ✔ A { char *eptr = encoded; char *dptr = decoded; int ok = 0; B ✔ while (*eptr) { B False True C ✔ char c; C c = *eptr; if (c == '+') { False True D D E E ✔ ✔ *dptr = ' '; elseif (c == '%') { } True False else F G ✔ F G int digit_high = Hex_Values[*(++eptr)]; *dptr = *eptr; int digit_low = Hex_Values[*(++eptr)]; } if (digit_high == -1 || digit_low == -1) { False True H I H I else { ok = 1; } *dptr = 16 * digit_high + digit_low; } ✔ L ++dptr; L ++eptr; M ✔ M *dptr = '\0'; } return ok; }
int cgi_decode(char *encoded, char *decoded) “test” “a+b” A ✔ A { char *eptr = encoded; “%3d” char *dptr = decoded; int ok = 0; B ✔ while (*eptr) { B False True C ✔ char c; C c = *eptr; if (c == '+') { False True D D E E ✔ ✔ *dptr = ' '; elseif (c == '%') { } True False else F G ✔ F ✔ G int digit_high = Hex_Values[*(++eptr)]; *dptr = *eptr; int digit_low = Hex_Values[*(++eptr)]; } if (digit_high == -1 || digit_low == -1) { False True H I ✔ H I else { ok = 1; } *dptr = 16 * digit_high + digit_low; } ✔ L ++dptr; L ++eptr; M ✔ M *dptr = '\0'; } return ok; }
int cgi_decode(char *encoded, char *decoded) “test” “a+b” A ✔ A { char *eptr = encoded; “%3d” char *dptr = decoded; int ok = 0; “%g” B ✔ while (*eptr) { B False True C ✔ char c; C c = *eptr; if (c == '+') { False True D D E E ✔ ✔ *dptr = ' '; elseif (c == '%') { } True False else F G ✔ F ✔ G int digit_high = Hex_Values[*(++eptr)]; *dptr = *eptr; int digit_low = Hex_Values[*(++eptr)]; } if (digit_high == -1 || digit_low == -1) { False True H I ✔ ✔ H I else { ok = 1; } *dptr = 16 * digit_high + digit_low; } ✔ L ++dptr; L ++eptr; M ✔ M *dptr = '\0'; } return ok; }
Test Adequacy Criteria • How do we know a test suite is "good enough"? • A test adequacy criterion is a predicate that is true or false for a pair ⟨ program, test suite ⟩ • Usually expressed in form of a rule – e.g., "all statements must be covered"
Statement Testing • Adequacy criterion: each statement (or node in the CFG) must be executed at least once • Rationale: a defect in a statement can only be revealed by executing the defect • Coverage: # executed statements # statements
Recommend
More recommend