P a r s i n g J S O N R e a l l y Q u i c k l y : L e s s o n s L e a - - PowerPoint PPT Presentation
P a r s i n g J S O N R e a l l y Q u i c k l y : L e s s o n s L e a - - PowerPoint PPT Presentation
P a r s i n g J S O N R e a l l y Q u i c k l y : L e s s o n s L e a r n e d D a n i e l L e m i r e b l o g : h t t p s : / / l e m i r e . m e t w i t t e r : @ l e m i r e G i t H u b : h t t p s : / / g i t h u b . c o m / l e m i r e / p ro fe
P a r s i n g J S O N R e a l l y Q u i c k l y : Le s s o n s Le a r n e d
D a n i e l Le m i re b l o g : h t t p s : / / l e m i re . m e t w i t te r : @ l e m i re G i t H u b : h t t p s : / / g i t h u b.c o m / l e m i re / p ro fe s s o r ( C o m p u te r S c i e n c e ) a t U n i ve r s i té d u Q u é b e c ( T É LU Q ) M o n t re a l 2H o w fa s t c a n yo u re a d a l a rg e f i l e ?
A re yo u l i m i te d by yo u r d i s k o r A re yo u l i m i te d by yo u r C P U ?
3A n i Ma c d i s k : 2 . 2 G B / s , Fa s te r S S Ds ( e .g . , 5 G B / s ) a re ava i l a b l e
4Re a d i n g tex t l i n e s ( C P U o n l y ) ~ 0.6 G B / s o n 3. 4 G H z S k y l a ke i n J ava
void parseLine(String s) { volume += s.length(); } void readString(StringReader data) { BufferedReader bf = new BufferedReader(data); bf.lines().forEach(s -> parseLine(s)); }
S o u rc e a v a i l a b l e . I m p rove d by J D K- 8 2 2 9 0 2 2
Re a d i n g tex t l i n e s ( C P U o n l y )
~ 1. 5 G B / s o n 3. 4 G H z S k y l a ke i n C+ + ( G N U G C C 8. 3 )size_t sum_line_lengths(char * data, size_t length) { std::stringstream is; is.rdbuf()->pubsetbuf(data, length); std::string line; size_t sumofalllinelengths{0}; while(getline(is, line)) { sumofalllinelengths += line.size(); } return sumofalllinelengths; }
S o u rc e a v a i l a b l e . 6J S O N
S p e c i f i e d by D o u g l a s C ro c k fo rd R F C 7 1 5 9 by T i m B r a y i n 2 0 1 3 U b i q u i to u s fo r m a t to e x c h a n g e d a t a{"Image": {"Width": 800,"Height": 600, "Title": "View from 15th Floor", "Thumbnail": { "Url": "http://www.example.com/81989943", "Height": 125,"Width": 100} }
8t w i t te r . j s o n : 0.3 5 G B / s o n 3. 4 G H z S k y l a ke
S o u rc e c o d e a v a i l a b l e . s p e e d J a c ks o n ( J a v a )- 0. 3 5 G B / s
- 1. 5 G B / s
t w i t te r . j s o n : 0.6 5 0 G B / s o n 3. 4 G H z S k y l a ke
s p e e d R a p i d J S O N ( C+ + )- 0. 6 5 G B / s
- 0. 3 5 G B / s
- 1. 5 G B / s
s i m d j s o n s p e e d ( C+ + )
t w i t te r . j s o n : 2 . 4 G B / s o n 3. 4 G H z S k y l a ke s p e e d s i m d j s o n ( C+ + ) 2 . 4 G B / s R a p i d J S O N ( C+ + )- 0. 6 5 G B / s
- 0. 3 5 G B / s
- 1. 5 G B / s
2 . 4 G B / s o n a 3. 4 G H z ( + t u r b o ) p ro c e s s o r i s ~ 1. 5 c yc l e s p e r i n p u t by te
1 4T r i c k # 1 : a vo i d h a rd - to - p re d i c t b ra n c h e s
1 5while (howmany != 0) {
- ut[index] = random();
index += 1; howmany--; }
e .g . , ~ 3 c yc l e s p e r i te r a t i o n 1 6while (howmany != 0) { val = random(); if( val is odd) { // <=== new
- ut[index] = val;
index += 1; } howmany--; }
1 7F ro m 3 c yc l e s to 1 5 c yc l e s p e r va l u e !
1 8G o b ra n c h l e s s !
while (howmany != 0) { val = random();
- ut[index] = val;
index += (val bitand 1); howmany--; }
b a c k to u n d e r 4 c yc l e s ! D e t a i l s a n d c o d e a v a i l a b l e 1 9W h a t i f I ke e p r u n n i n g t h e s a m e b e n c h m a r k ?
( s a m e p s e u d o - r a n d o m i n te g e r s f ro m r u n - to - r u n ) 2 0T r i c k # 2 : U s e w i d e " wo rd s "
D o n' t p ro c e s s by te by by te
2 1W h e n p o s s i b l e , u s e S I M D
Av a i l a b l e o n m o s t c o m m o d i t y p ro c e s s o r s ( A R M , x 6 4 ) O r i g i n a l l y a d d e d ( Pe n t i u m ) fo r m u l t i m e d i a ( s o u n d ) A d d w i d e r ( 1 2 8 - b i t , 2 5 6 - b i t , 5 1 2 - b i t ) re g i s te r s A d d s n e w f u n i n s t r u c t i o n s : d o 3 2 t a b l e l o o k u p s a t o n c e . 2 2- 5 1 2
T r i c k # 3 : a vo i d m e m o r y / o b j e c t a l l o c a t i o n
2 5T r i c k # 4 : m e a s u re t h e p e r fo r m a n c e !
b e n c h m a r k- d r i ve n d e ve l o p m e n t 2 7C o n t i n u o u s I n te g ra t i o n Pe r fo r m a n c e te s t s
p e r fo r m a n c e re g re s s i o n i s a b u g t h a t s h o u l d b e s p o t te d e a r l y 2 8P ro c e s s o r f re q u e n c i e s a re n o t c o n s ta n t
E s p e c i a l l y o n l a p to p s C P U c yc l e s d i f fe re n t f ro m t i m e T i m e c a n b e n o i s i e r t h a n C P U c yc l e s 2 9S p e c i f i c exa m p l e s
3 0E xa m p l e 1. U T F - 8
S t r i n g s a re A S C I I ( 1 by te p e r c o d e p o i n t ) O t h e r w i s e m u l t i p l e by te s ( 2 , 3 o r 4 ) O n l y 1. 1 M v a l i d U T F - 8 c o d e p o i n t s 3 1if (byte1 < 0x80) { return true; // ASCII } if (byte1 < 0xE0) { if (byte1 < 0xC2 || byte2 > 0xBF) { return false; } } else if (byte1 < 0xF0) { // Three-byte form. if (byte2 > 0xBF || (byte1 == 0xE0 && byte2 < 0xA0) || (byte1 == 0xED && 0xA0 <= byte2) blablabla ) blablabla } else { // Four-byte form. .... blabla }
3 2U s i n g S I M D
Lo a d 3 2 - by te re g i s te r s U s e ~ 2 0 i n s t r u c t i o n s N o b r a n c h , n o b r a n c h m i s p re d i c t i o n 3 3_mm256_subs_epu8(current_bytes, 244 );
O n e i n s t r u c t i o n , c h e c ks 3 2 by te s a t o n c e ! 3 4p ro c e s s i n g ra n d o m U T F - 8
c yc l e s / by te b r a n c h i n g 1 1 s i m d j s o n- 0. 5
20 x fa s te r !
S o u rc e c o d e a v a i l a b l e . 3 5E xa m p l e 2 . C l a s s i f y i n g c h a ra c te r s
c o m m a ( 0 x 2 c ) , c o l o n ( 0 x 3 a ) : b r a c ke t s ( 0 x 5 b, 0 x 5 d , 0 x 7 b, 0 x 7d ) : [, ], {, } w h i te - s p a c e ( 0 x 0 9, 0 x 0 a , 0 x 0 d , 0 x 2 0 )- t h e r s
A R M N E O N a n d x 6 4 p ro c e s s o r s h ave i n s t r u c t i o n s to l o o k u p 1 6 - by te ta b l e s i n a ve c to r i ze d m a n n e r ( 1 6 va l u e s a t a t i m e ) : p s h u f b, t b l
3 8H1(low(c)) & H2(high(c))
c o m m a ( 0 x 2 c ) : 1 c o l o n ( 0 x 3 a ) : 2 b r a c ke t s ( 0 x 5 b, 0 x 5 d , 0 x 7 b, 0 x 7d ) : 4 m o s t w h i te - s p a c e ( 0 x 0 9, 0 x 0 a , 0 x 0 d ) : 8 w h i te s p a c e ( 0 x 2 0 ) : 1 6- t h e r s : 0
const uint8x16_t low_nibble_mask = (uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0}; const uint8x16_t high_nibble_mask = (uint8x16_t){8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0}; const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf);
F i ve i n s t r u c t i o n s :uint8x16_t nib_lo = vandq_u8(chunk, low_nib_and_mask); uint8x16_t nib_hi = vshrq_n_u8(chunk, 4); uint8x16_t shuf_lo = vqtbl1q_u8(low_nibble_mask, nib_lo); uint8x16_t shuf_hi = vqtbl1q_u8(high_nibble_mask, nib_hi); return vandq_u8(shuf_lo, shuf_hi);
4 1E xa m p l e 3. D e te c t i n g e s c a p e d c h a ra c te r s
" \ " \ \ \ \ " \ \ \ "
4 2Ca n yo u te l l w h e re t h e s t r i n g s s ta r t a n d e n d ?
{ "\\\"Nam[{": [ 116,"\\\\"
. . . W i t h o u t b ra n c h i n g ?
4 3E s c a p e c h a ra c te r s fo l l ow a n o d d s e q u e n c e o f b a c ks l a s h e s !
4 4{ "\\\"Nam[{": [ 116,"\\\\" ___111________________1111_
: B O d d a n d e ve n p o s i t i o n s1_1_1_1_1_1_1_1_1_1_1_1_1_1
: E ( c o n s t a n t )_1_1_1_1_1_1_1_1_1_1_1_1_1_
: O ( c o n s t a n t ) 4 5(((B + (B &~(B << 1)& E))& ~B)& ~E) | (((B + ((B &~(B << 1))& O))& ~B)& E)
R e s u l t :{ "\\\"Nam[{": [ 116,"\\\\"
. . .______1____________________
N o b r a n c h ! 4 6Re m ove t h e e s c a p e d q u o te s , a n d
t h e re m a i n i n g q u o te s te l l yo u w h e re t h e s t r i n g s a re !
4 7{ "\\\"Nam[{": [ 116,"\\\\" __1___1_____1________1____1
: a l l q u o te s______1____________________
: e s c a p e d q u o te s__1_________1________1____1
: s t r i n g - d e l i m i te r q u o te s 4 8F i n d t h e s p a n o f t h e s t r i n g
mask = quote xor (quote << 1); mask = mask xor (mask << 2); mask = mask xor (mask << 4); mask = mask xor (mask << 8); mask = mask xor (mask << 16); ... __1_________1________1____1
( q u o te s ) b e c o m e s__1111111111_________11111_
( s t r i n g re g i o n ) 4 9E n t i re s t r u c t u re o f t h e J S O N d o c u m e n t c a n b e i d e n t i f i e d ( a s a b i t s e t ) w i t h o u t a ny b ra n c h !
5 0E xa m p l e 4 . D e c o d e b i t i n d exe s
G i ve n t h e b i t s e t 1000100010001 , w e w a n t t h e l o c a t i o n o f t h e 1 s ( e .g . , 0, 4 , 8 1 2 ) 5 1while (word != 0) { result[i] = trailingzeroes(word); word = word & (word - 1); i++; }
I f n u m b e r o f 1 s p e r 6 4 - b i t i s h a rd to p re d i c t : l o t s o f m i s p re d i c t i o n s ! ! ! 5 2while (word != 0) { result[i] = trailingzeroes(word); word = word & (word - 1); result[i+1] = trailingzeroes(word); word = word & (word - 1); result[i+2] = trailingzeroes(word); word = word & (word - 1); result[i+3] = trailingzeroes(word); word = word & (word - 1); i+=4; }
D i s c a rd b o g u s i n d e x e s by c o u n t i n g t h e n u m b e r o f 1 s i n t h e w o rd d i re c t l y ( e .g . ,bitCount
) 5 4E xa m p l e 5 . N u m b e r p a r s i n g i s ex p e n s i ve
strtod
: 9 0 M B / s 3 8 c yc l e s p e r by te 1 0 b r a n c h m i s s e s p e r f l o a t i n g - p o i n t n u m b e r 5 5bool is_made_of_eight_digits_fast(const char *chars) { uint64_t val; memcpy(&val, chars, 8); return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); }
5 6uint32_t parse_eight_digits_unrolled(const char *chars) { uint64_t val; memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return (val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32; }
C a n d o e ve n b e t te r w i t h S I M D 5 7R u n t i m e d i s p a tc h
O n f i r s t c a l l , p o i n te r c h e c ks C P U , a n d re a s s i g n s i t s e l f. N o l a n g u a g e s u p p o r t . 5 8int json_parse_dispatch(...) { Architecture best_implementation = find_best_supported_implementation(); // Selecting the best implementation switch (best_implementation) { case Architecture::HASWELL: json_parse_ptr = &json_parse_implementation<Architecture::HASWELL>; break; case Architecture::WESTMERE: json_parse_ptr= &json_parse_implementation<Architecture::WESTMERE>; break; default: return UNEXPECTED_ERROR; } return json_parse_ptr(....); }
5 9- l f, T
- j c i e c h M u ł a , G e o rg i o s F l o ro s , D o n g X i e , Na n X i a o, E g o r
- u te r B o l s te r l e e , A n i s h Ka r a n d i k a r
- m D y s o n , I h o r D o t s e n ko, A l e x e y M i l ov i d ov