SLIDE 56 NVIDIA Volta – machine code example (***)
https://github.com/pkaski/motif-localized
LOP3.LUT R8, R6, R8, R19, 0x96, !PT; /* 0x0000000806087212 */ /* 0x000fe400078e9613 */ LOP3.LUT R64, R11, R64, RZ, 0x3c, !PT; /* 0x000000400b407212 */ /* 0x000fc400078e3cff */ LOP3.LUT R62, R62, R5, R4.reuse, 0x96, !PT; /* 0x000000053e3e7212 */ /* 0x100fe400078e9604 */ LOP3.LUT R17, R17, R15.reuse, R7.reuse, 0x78, !PT; /* 0x0000000f11117212 */ /* 0x180fe400078e7807 */ LOP3.LUT R8, R8, R15, R7, 0x78, !PT; /* 0x0000000f08087212 */ /* 0x000fe400078e7807 */ LOP3.LUT R18, R19.reuse, R18, R4.reuse, 0x96, !PT; /* 0x0000001213127212 */ /* 0x140fe400078e9604 */ LOP3.LUT R5, R19, R10, R4, 0x96, !PT; /* 0x0000000a13057212 */ /* 0x000fe400078e9604 */ LOP3.LUT R9, R6, R9, R19, 0x96, !PT; /* 0x0000000906097212 */ /* 0x000fc400078e9613 */ LOP3.LUT R7, R64, R15, R7, 0x78, !PT; /* 0x0000000f40077212 */ /* 0x000fe400078e7807 */ LOP3.LUT R61, R61, R12, R19, 0x96, !PT; /* 0x0000000c3d3d7212 */ /* 0x000fe400078e9613 */ LOP3.LUT R59, R17, R59, RZ, 0x3c, !PT; /* 0x0000003b113b7212 */ /* 0x000fe400078e3cff */ LOP3.LUT R60, R60, R5, R6, 0x96, !PT; /* 0x000000053c3c7212 */ /* 0x000fe400078e9606 */ LOP3.LUT R58, R9, R58, RZ, 0x3c, !PT; /* 0x0000003a093a7212 */ /* 0x000fe400078e3cff */ LOP3.LUT R51, R18, R51, RZ, 0x3c, !PT; /* 0x0000003312337212 */ /* 0x000fc400078e3cff */ LOP3.LUT R50, R8, R50, RZ, 0x3c, !PT; /* 0x0000003208327212 */ /* 0x000fe400078e3cff */ LOP3.LUT R57, R7, R57, RZ, 0x3c, !PT; /* 0x0000003907397212 */ /* 0x000fe200078e3cff */ @P0 BRA 0x8d0; /* 0xfffff96000000947 */ /* 0x000fee000383ffff */
Example: A part of the inner loop of parallelised GF(28) multilinear sieving algorithm implemented with NVIDIA GV100 GPU machine code (Compute Capability 7.0)