SLIDE 22 The compiled sum reduction (parallelizing compiler).
sum : cmpq $2 , %r s i ; i f (n>2) goto .L1 j a .L1 movq (% r d i ) , %rax ; rax = t [ 0 ] jb .L2 ; i f (n<2) goto .L2 addq 8(% r d i ) , %rax ; rax = t [ 0 ] + t [ 1 ] .L2 : endfork ; r e t u r n ( rax ) .L1 : ; at fork , rsp , rbp , r di , r s i and rbx are copied subq $8 , %rsp ; a l l o c ( temp ) movq %r s i , %rbp ; rbp = n shrq %r s i ; n = n/2 f o r k sum ; rax = sum( t , n /2) movq %rax , 0(% rsp ) ; temp = t [ 0 ] + . . . + t [ n/2−1] l e a q (%r di , %r s i , 8) , %r d i ; t = t + n/2∗8 = &(t [ n / 2 ] ) subq %r s i , %rbp ; rbp = n − n/2 movq %rbp , %r s i ; n = n − n/2 f o r k sum ; rax = sum(&( t [ n / 2 ] ) , n−n /2) addq 0(% rsp ) , %rax ; rax = t [ 0 ] + . . . + t [ n/2−1] ; + t [ n /2] + . . . + t [ n−1] addq $8 , %rsp ; f r e e ( temp ) endfork ; r e t u r n ( rax )
11 / 33