Meaningful Variable Names for Decompiled Code: A Machine Translation Approach
Alan Jaffe, Jeremy Lacomis, Edward J. Schwartz*, Claire Le Goues, and Bogdan Vasilescu *
Meaningful Variable Names for Decompiled Code: A Machine - - PowerPoint PPT Presentation
Meaningful Variable Names for Decompiled Code: A Machine Translation Approach Alan Jaffe, Jeremy Lacomis , Edward J. Schwartz*, Claire Le Goues, and Bogdan Vasilescu * Problem: Obfuscated Variable Names in Code Minified JavaScript: function
Alan Jaffe, Jeremy Lacomis, Edward J. Schwartz*, Claire Le Goues, and Bogdan Vasilescu *
2
function callback(error, response, body) { if (!error && response.statusCode == 200) { var info = JSON.parse(body); … function callback(o, s, a) { if (!o && s.statusCode == 200) { var c = JSON.parse(a); …
Minified JavaScript:
3
function callback(error, response, body) { if (!error && response.statusCode == 200) { var info = JSON.parse(body); … function callback(o, s, a) { if (!o && s.statusCode == 200) { var c = JSON.parse(a); …
Minified JavaScript:
4
function callback(error, response, body) { if (!error && response.statusCode == 200) { var info = JSON.parse(body); … function callback(o, s, a) { if (!o && s.statusCode == 200) { var c = JSON.parse(a); … cp = buf; (void)asxTab(level + 1); for (n = asnContents(asn, buf, 512); n > 0; n--) { printf(" %02X ", *(cp++)); } v14 = &v15; asxTab(a2 + 1); for (v13 = asnContents(a1, &v15, 512LL); v13 > 0; --v13) { v9 = (unsignedchar*)(v14++); printf(" %02X ", *v9); }
Minified JavaScript: Decompiled C Code:
5
function callback(error, response, body) { if (!error && response.statusCode == 200) { var info = JSON.parse(body); … function callback(o, s, a) { if (!o && s.statusCode == 200) { var c = JSON.parse(a); … cp = buf; (void)asxTab(level + 1); for (n = asnContents(asn, buf, 512); n > 0; n--) { printf(" %02X ", *(cp++)); } v14 = &v15; asxTab(a2 + 1); for (v13 = asnContents(a1, &v15, 512LL); v13 > 0; --v13) { v9 = (unsignedchar*)(v14++); printf(" %02X ", *v9); }
Minified JavaScript: Decompiled C Code:
6
function callback(error, response, body) { if (!error && response.statusCode == 200) { var info = JSON.parse(body); … function callback(o, s, a) { if (!o && s.statusCode == 200) { var c = JSON.parse(a); …
Minified JavaScript:
7
function callback(error, response, body) { if (!error && response.statusCode == 200) { var info = JSON.parse(body); … function callback(o, s, a) { if (!o && s.statusCode == 200) { var c = JSON.parse(a); …
Minified JavaScript:
8
function callback(error, response, body) { if (!error && response.statusCode == 200) { var info = JSON.parse(body); … function callback(o, s, a) { if (!o && s.statusCode == 200) { var c = JSON.parse(a); …
Minified JavaScript:
9
cp = buf; (void)asxTab(level + 1); for (n = asnContents(asn, buf, 512); n > 0; n--) { printf(" %02X ", *(cp++)); } v14 = &v15; asxTab(a2 + 1); for (v13 = asnContents(a1, &v15, 512LL); v13 > 0; --v13) { v9 = (unsignedchar*)(v14++); printf(" %02X ", *v9); }
Decompiled C Code:
10
11
12
13
!"#$!%& ( ) *)
14
= "#$%"&' ) * +) )(+) )(*) "#$%"&' ) + *) = "#$%"&' ) * +) )(+)
15
= "#$%"&' ) * +) )(+) )(*) "#$%"&' ) + *) = "#$%"&' ) * +) )(+)
Translation Model: Probability that f is a translation of e
16
= "#$%"&' ) * +) )(+) )(*) "#$%"&' ) + *) = "#$%"&' ) * +) )(+)
Language Model: “Fluency” of e
17
= "#$%"&' ) * +) )(+) )(*) "#$%"&' ) + *) = "#$%"&' ) * +) )(+) ) * +): Translation Model )(+): Language Model MOSES SMT:
18 Aligned French/English corpus English corpus
19 Aligned original/minified source corpus Original source corpus
21
cp = buf; (void)asxTab(level + 1); for (n = asnContents(asn, buf, 512); n > 0; n--) { printf(" %02X ", *(cp++)); } v14 = &v15; asxTab(a2 + 1); for (v13 = asnContents(a1, &v15, 512LL); v13 > 0; --v13) { v9 = (unsignedchar*)(v14++); printf(" %02X ", *v9); }
Decompiled C Code:
22 Aligned original/decompiled source corpus Original source corpus
23 Aligned original/decompiled source corpus Original source corpus
24
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Source Decompiled Code
25
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Source Decompiled Code
26
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Source Decompiled Code
27
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Source Decompiled Code
28
#include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Decompiled Code
29 ❌
#include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Decompiled Code
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; }
Original Code
30
#include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
❌
#include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Decompiled Code
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; }
Original Code
31
#include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
❌
#include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Decompiled Code
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; }
Original Code
32
#include <stdio.h> int main() { int v1 = 0; int __; for (__ = 0; __ < 10; ++__) printf("%d\n", __); return v1; }
❌
#include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Decompiled Code
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; }
Original Code
33
#include <stdio.h> int main() { int v1 = 0; int cur; for (cur = 0; cur < 10; ++cur) printf("%d\n", cur); return v1; }
❌
int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Decompiled Code
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; }
Original Code Renamed Decompiled Code
36 Aligned renamed/decompiled source corpus Renamed source corpus
37
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Code Decompiled Code
38
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Code Decompiled Code
39
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Code Decompiled Code
40
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Code Decompiled Code
41
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Code Decompiled Code
42
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int v2; for (v2 = 0; v2 < 10; ++v2) printf("%d\n", v2); return v1; }
Original Code Decompiled Code
43
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int __; for (__ = 0; __ < 10; ++__) printf("%d\n", __); return v1; }
Original Code Decompiled Code
44
#include <stdio.h> int main() { int cur = 0; while (cur <= 9) { printf("%d\n", cur); ++cur; } return 0; } #include <stdio.h> int main() { int v1 = 0; int cur; for (cur = 0; cur < 10; ++cur) printf("%d\n", cur); return v1; }
Original Code Decompiled Code
45
46
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size)
Original
47
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size) my_rc base2_string(base2_handle a1, char* a2, size_t a3)
Original Decompiled
48
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size) my_rc base2_string(base2_handle a1, char* a2, size_t a3)
Original Decompiled
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
49
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size)
Original
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
50
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size)
Original
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
51
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size)
Original
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
52
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size)
Original
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
53
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size)
Original
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
54
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size)
Original
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
55
my_rc base2_string(base2_handle base2_h, char* buffer, size_t buffer_size) my_rc base2_string(base2_handle a1, char* a2, size_t a3)
Original Decompiled
my_rc base2_string(base2_handle base2_h, char* buf, size_t len)
Renamed Decompiled
asked to perform various maintenance tasks, graded and timed:
56
asked to perform various maintenance tasks, graded and timed:
57
1 int x = 1; 2 int y = 0; 3 while (x <= 5) { 4 y += 2; 5 x += 1; 6 } 7 printf("%d", y);
asked to perform various maintenance tasks, graded and timed:
58
1 int x = 1; 2 int y = 0; 3 while (x <= 5) { 4 y += 2; 5 x += 1; 6 } 7 printf("%d", y);
significantly lower than when using the decompiler names.
System Architecture
45
59