Memory layout Performance improvements
RICH Cherenkov angle status report March 2017
Christina Quast March 6, 2017
Christina Quast RICH Cherenkov angle status report March 2017
RICH Cherenkov angle status report March 2017 Christina Quast March - - PowerPoint PPT Presentation
Memory layout Performance improvements RICH Cherenkov angle status report March 2017 Christina Quast March 6, 2017 Christina Quast RICH Cherenkov angle status report March 2017 Memory layout Performance improvements Nanoseconds per photon
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
+++ b/ QuarticSolverCacheline .h
+ T reflPointX __attribute__ (( __aligned__ (64))); + T reflPointY __attribute__ (( __aligned__ (64))); + T reflPointZ __attribute__ (( __aligned__ (64))); reflPointX = ex + CoCX; reflPointY = ey + CoCY; @@ // TODO :align 64 // FIXME: ueberall const dranmachen ?
emissionPointVecX ; + VECT emissionPointVecX __attribute__ (( __aligned__ (64))); emissionPointVecX .load_a (& data.emissPnt.x()[0]);
emissionPointVecY ; + VECT emissionPointVecY __attribute__ (( __aligned__ (64))); emissionPointVecY .load_a (& data.emissPnt.y()[0]);
emissionPointVecZ ; + VECT emissionPointVecZ __attribute__ (( __aligned__ (64))); emissionPointVecZ .load_a (& data.emissPnt.z()[0]);
CoCX; + VECT CoCX __attribute__ (( __aligned__ (64))); CoCX.load_a (& data.centOfCurv .x()[0]);
CoCY; + VECT CoCY __attribute__ (( __aligned__ (64))); CoCY.load_a (& data.centOfCurv .y()[0]);
CoCZ; Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements + VECT CoCZ __attribute__ (( __aligned__ (64))); CoCZ.load_a (& data.centOfCurv .z()[0]); @ VECT e2 = evecX*evecX + evecY*evecY + evecZ*evecZ; // vector from mirror centre
curvature to virtual detec
virtDetPointVecX ; + VECT virtDetPointVecX __attribute__ (( __aligned__ (64))); virtDetPointVecX .load_a (& data. virtDetPoint .x()[0]);
virtDetPointVecY ; + VECT virtDetPointVecY __attribute__ (( __aligned__ (64))); virtDetPointVecY .load_a (& data. virtDetPoint .y()[0]);
virtDetPointVecZ ; + VECT virtDetPointVecZ __attribute__ (( __aligned__ (64))); virtDetPointVecZ .load_a (& data. virtDetPoint .z()[0]); // const Vector dvec( virtDetPoint
@@
namespace RichCacheline
radius; + VECT radius __attribute__ (( __aligned__ (64))); radius.load_a (& data.radius [0]);
reflPointX ;
reflPointY ;
reflPointZ ; + VECT reflPointX __attribute__ (( __aligned__ (64))); + VECT reflPointY __attribute__ (( __aligned__ (64))); + VECT reflPointZ __attribute__ (( __aligned__ (64)));
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements +++ b/main.cpp @@
main ( int argc , char ** argv)
+ VECTYPE :: PhotonReflections <float > dataV0_vect __attribute__ (( __aligned__ (64))); + VECTYPE :: PhotonReflections <float > dataV1_vect __attribute__ (( __aligned__ (64))); diff
index 75 c05bf ..72 db553 100644
+++ b/vectype.h template <typename T, std :: size_t DIM = 16>
PhotonReflections = std :: vector <PhotonReflection <T, DIM >>; + using PhotonReflections = std :: vector <PhotonReflection <T, DIM >, aligned_alloca Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
+++ b/ QuarticSolverCacheline .h @@
namespace RichCacheline
+ const T divnorm = approx_recipr (norm ); + const T norm_sqrt = approx_recipr ( approx_rsqrt (norm )); nx *= divnorm; ny *= divnorm; nz *= divnorm; @@
auto enorm = radius/e; + const auto enorm = radius* approx_recip @@
cosgamma2 = (evecDvec * evecDvec )/ ed2; + VECT cosgamma2 = (evecDvec * evecDvec) * approx_recipr (ed2 );
VECT e = sqrt(e2);
VECT d = sqrt(d2); + const VECT e = approx_recipr ( approx_rsqrt (e2 )); + const VECT d = approx_recipr ( approx_rsqrt (d2 ));
VECT singamma = sqrt (1.0f - cosgamma2 ));
VECT cosgamma = approx_recipr ( approx_rsqrt (cosgamma2 )); + const VECT singamma = approx_recipr ( approx_rsqrt (1.0f - cosgamma2 )); + const VECT cosgamma = approx_recipr ( approx_rsqrt (cosgamma2 )); @@ const VECT maxval = std :: numeric_limits <SKALART >:: max ();
VECT inv_a0 = ((a0 > 0)? 1.0f/a0: maxval ); + const VECT inv_a0 = ((a0 > 0)? approx_recipr (a0): maxval ); @@
auto toberooted = (abs(R) + sqrt(abs(R2 -Q3)) ); + const auto toberooted = (abs(R) + approx_recipr ( approx_rsqrt (abs(R2 -Q3 )))); Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements // FIXME: oder zuerst in normales array , dann load? // FIXME: also for double? @@ const auto A = sgnR * rooted; PR(A);
auto B = Q / A; + const auto B = Q * approx_recipr (A);
auto u1 =
+ const auto u1 =
// FIXME: saturated
// const const auto u2 = UU * abs_saturated (A-B); const auto u2 = UU * abs(A-B);
auto V = sqrt(u1*u1 + u2*u2); + const auto V = approx_recipr ( approx_rsqrt (u1*u1 + u2*u2 )); // std :: complex <TYPE > w3 = ( abs_satured (V) != 0.0 ? (TYPE )( qq *
// std :: complex <TYPE >(0 ,0) ); // FIXME: warum abs saturated when compared to 0.0 ??
auto w3r = ((V != 0.0)? (qq *
+ const auto w3r = ((V != 0.0)? (qq *
// TYPE res = std :: real(w1) + std :: real(w2) + std :: real(w3) - (r4*a);
auto res = sqrt ((u1+V)*2) + w3r - (r4*a); + const auto res = approx_recipr ( approx_rsqrt ((u1+V)*2)) + w3r - (r4*a); // return the final result // FIXME: std :: move ? const auto r = (( res > 1.0)? 1.0: (( res <
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
+++ b/ QuarticSolverCacheline .h @@
namespace RichCacheline { + builtin_prefetch (&(((& data )+0)-> radius [0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> emissPnt.x())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> emissPnt.y())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> emissPnt.z())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> centOfCurv.x())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> centOfCurv.y())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> centOfCurv.z())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> virtDetPoint .x())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> virtDetPoint .y())[0]) , 0, 3); + builtin_prefetch (&(((& data +1)-> virtDetPoint .z())[0]) , 0, 3); VECT emissionPointVecX __attribute__ (( __aligned__ (64))); emissionPointVecX .load_a (& data.emissPnt.x()[0]); VECT emissionPointVecY __attribute__ (( __aligned__ (64))); @@ + __builtin_prefetch (& data. sphReflPoint .x()[0] , 1, 0); + __builtin_prefetch (& data. sphReflPoint .y()[0] , 1, 0); + __builtin_prefetch (& data. sphReflPoint .z()[0] , 1, 0); reflPointX .store_a (& data. sphReflPoint .x()[0]); reflPointY .store_a (& data. sphReflPoint .y()[0]); Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
+++ b/ QuarticSolverCacheline .h @@
namespace RichCacheline const T norm = (nx*nx+ny*ny+nz*nz);
const T norm_sqrt = approx_recipr ( approx_rsqrt (norm ));
divnorm;
divnorm;
divnorm;
beta = asin(sinbeta );
auto beta = asin(sinbeta );
auto beta = T(asin(sinbeta.get_low ()), asin(sinbeta.get_high ()));
auto a = sinbeta*norm_sqrt;
auto b = (1.0f-cos(beta ))*( norm );
auto enorm = radius* approx_recipr (e); + const auto b = (1.0f- approx_recipr ( approx_rsqrt (1.0f-( sinbeta*sinbeta )))); + const auto enorm = radius* approx_recipr (e*norm );
+ const std ::array <T, 9> M = {norm+b*(-nz*nz -ny*ny), a*nz+b*nx*ny , -a*ny+b*nx*nz , +
+ a*ny+b*nx*nz , -a*nx+b*ny*nz , norm+b*(-ny*ny -nx*nx )}; Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Kaon ID Efficiency / % 80 85 90 95 100 Pion MisID Efficiency / % 1 10
RICH Kaon ID
1.16162 9.24242 Old Quartic RichDLLk-RichDLLpi > cut Long tracks | 3<P(GeV)<100 | 0.5<Pt(GeV)<100 | 30<TkAng(mrad)<300 Required Dets : AnyRICH 13087 Kaons in Acceptance
0.151515 7.72727 New NR Quartic RichDLLk-RichDLLpi > cut Long tracks | 3<P(GeV)<100 | 0.5<Pt(GeV)<100 | 30<TkAng(mrad)<300 Required Dets : AnyRICH 13087 Kaons in Acceptance
RICH Kaon ID
Christina Quast RICH Cherenkov angle status report March 2017
Memory layout Performance improvements
Christina Quast RICH Cherenkov angle status report March 2017