C a n F P G A s c
- m
p e t e w i t h G P U s ?
J
- h
n R
- m
e i n , B r a m V e e n b
- e
r
G P U T e c h n
- l
- g
y C
- n
f e r e n c e ( G T C ’ 1 9 ) M a r c h 1 8
- 2
1 , 2 1 9
C a n F P G A s c o m p e t e w i t h G P U s - - PowerPoint PPT Presentation
G P U T e c h n o l o g y C o n f e r e n c e ( G T C 1 9 ) M a r c h 1 8 - 2 1 , 2 0 1 9 C a n F P G A s c o m p e t e w i t h G P U s ? J o h n R o m e i n , B r
G P U T e c h n
y C
f e r e n c e ( G T C ’ 1 9 ) M a r c h 1 8
1 , 2 1 9
– h
– p
– c
– l
– m
– r
– m
– l
– t
– c
– i
– p
– V
– d
➔ s
➔ a
C.real += A.real * B.real; C.real += -A.imag * B.imag; C.imag += A.real * B.imag; C.imag += A.imag * B.real;
B A A C
r
C
i r i
B
r i
C
r
C
i
F P U F P U F P U F P U
c h a n n e l d e v i c e me mo r y k e r n e l _ 1 k e r n e l _ 1 k e r n e l _ 2 k e r n e l _ 2
1 2 # p r a g ma u n r
l a [ i ] = i ; f
( i n t i = ; i < 3 ; i + + )
B A A C
r
C
i r i
B
r i
C
r
C
i
F P U F P U F P U F P U
init_once(); for (int i = 0; i < 100000; i ++) do_many_times(); for (int i = 256; i > 0; i --) a[i] = a[i – 1];
m a x
– U
m a x
– T
r e p e a t r e p e a t r e
d e r r e p e a t r e
d e r r e p e a t r e
d e r r e a d A ma t r i x r e
d e r
P E P E P E P E P E P E P E P E P E P E P E P E P E P E P E P E
r e a d B ma t r i x r e p e a t r e
d e r r e p e a t r e
d e r r e p e a t r e
d e r r e p e a t r e
d e r c
l e c t c
l e c t c
l e c t w r i t e C ma t r i x c
l e c t
#pragma parallel for s = 1...S : complex<float> subgrid[P ][N ×N ]; for i = 1...N ×N : float offset = compute_offset(s, i); for t = 1...T : float index = compute_index(s, i, t); for c = 1...C : float scale = scales[c]; float phase = offset - (index × scale); complex<float> phasor = {cos(phase), sin(phase)}; #pragma unroll for p = 1...P : // 4 polarizations complex<float> visibility = visibilities[t][c][p]; subgrid[p][i] += cmul(phasor, visibility); apply_aterm(subgrid); apply_taper(subgrid); apply_ifft(subgrid); store(subgrid);
– U
– E
– n
__attribute__((max_global_work_dim(0))) __attribute__((autorun)) __attribute__((num_compute_units(NR_GRIDDERS))) __kernel void gridder() { int gridder = get_compute_id(0); float8 subgrid[NR_PIXELS]; for (unsigned short pixel = 0; pixel < NR_PIXELS; pixel ++) { | subgrid[pixel] = 0; } #pragma ivdep for (unsigned short vis_major = 0; vis_major < NR_VISIBILITIES; vis_major += UNROLL_FACTOR) { | float8 visibilities[UNROLL_FACTOR] __attribute__((register)); | | for (unsigned short vis_minor = 0; vis_minor < UNROLL_FACTOR; vis_minor++) { | | visibilities[vis_minor] = read_channel_intel(visibilities_channel[gridder]); | } | | for (unsigned short pixel = 0; pixel < NR_PIXELS; pixel++) { | | float8 pixel_value = subgrid[pixel]; | | float8 phasors = read_channel_intel(phasors_channel[gridder]); // { cos(phase), sin(phase) } | | | | #pragma unroll | | for (unsigned short vis_minor = 0; vis_minor < UNROLL_FACTOR; vis_minor++) { | | | pixel_value.even += phasors[vis_minor] * visibilities[vis_minor].even + -phasors[vis_minor] * visibilities[vis_minor].odd; | | | pixel_value.odd += phasors[vis_minor] * visibilities[vis_minor].odd + phasors[vis_minor] * visibilities[vis_minor].even; | | } | | | | subgrid[pixel] = pixel_value; | } } for (unsigned short pixel = 0; pixel < NR_PIXELS; pixel ++) { | write_channel_intel(pixel_channel[gridder], subgrid[pixel]); } }
– m
m a x
m a x
– r
– d
– f
– t
– l
– b
– F
– e
– m
– o
– F
– s
– d
– o