FK20 CUDA
fp_mul.cuh
Go to the documentation of this file.
1 #ifndef FP_MUL
2 
8 #define FP_MUL(Z, X, Y) \
9  "\n\tmul.lo.u64 "#Z"1, "#X"0, "#Y"1 ; mul.hi.u64 "#Z"2, "#X"0, "#Y"1 ;" \
10  "\n\tmul.lo.u64 "#Z"3, "#X"0, "#Y"3 ; mul.hi.u64 "#Z"4, "#X"0, "#Y"3 ;" \
11  "\n\tmul.lo.u64 "#Z"5, "#X"0, "#Y"5 ; mul.hi.u64 "#Z"6, "#X"0, "#Y"5 ;" \
12 \
13  "\n\tmul.lo.u64 "#Z"0, "#X"0, "#Y"0 ; mad.hi.u64.cc "#Z"1, "#X"0, "#Y"0, "#Z"1;" \
14  "\n\tmadc.lo.u64.cc "#Z"2, "#X"0, "#Y"2, "#Z"2; madc.hi.u64.cc "#Z"3, "#X"0, "#Y"2, "#Z"3;" \
15  "\n\tmadc.lo.u64.cc "#Z"4, "#X"0, "#Y"4, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"0, "#Y"4, "#Z"5;" \
16  "\n\taddc.u64 "#Z"6, 0, "#Z"6;" \
17 \
18 \
19  "\n\tmad.lo.u64.cc "#Z"2, "#X"1, "#Y"1, "#Z"2; madc.hi.u64.cc "#Z"3, "#X"1, "#Y"1, "#Z"3;" \
20  "\n\tmadc.lo.u64.cc "#Z"4, "#X"1, "#Y"3, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"1, "#Y"3, "#Z"5;" \
21  "\n\tmadc.lo.u64.cc "#Z"6, "#X"1, "#Y"5, "#Z"6; madc.hi.u64 "#Z"7, "#X"1, "#Y"5, 0;" \
22 \
23  "\n\tmad.lo.u64.cc "#Z"1, "#X"1, "#Y"0, "#Z"1; madc.hi.u64.cc "#Z"2, "#X"1, "#Y"0, "#Z"2;" \
24  "\n\tmadc.lo.u64.cc "#Z"3, "#X"1, "#Y"2, "#Z"3; madc.hi.u64.cc "#Z"4, "#X"1, "#Y"2, "#Z"4;" \
25  "\n\tmadc.lo.u64.cc "#Z"5, "#X"1, "#Y"4, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"1, "#Y"4, "#Z"6;" \
26  "\n\taddc.u64 "#Z"7, 0, "#Z"7;" \
27 \
28 \
29  "\n\tmad.lo.u64.cc "#Z"3, "#X"2, "#Y"1, "#Z"3; madc.hi.u64.cc "#Z"4, "#X"2, "#Y"1, "#Z"4;" \
30  "\n\tmadc.lo.u64.cc "#Z"5, "#X"2, "#Y"3, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"2, "#Y"3, "#Z"6;" \
31  "\n\tmadc.lo.u64.cc "#Z"7, "#X"2, "#Y"5, "#Z"7; madc.hi.u64 "#Z"8, "#X"2, "#Y"5, 0;" \
32 \
33  "\n\tmad.lo.u64.cc "#Z"2, "#X"2, "#Y"0, "#Z"2; madc.hi.u64.cc "#Z"3, "#X"2, "#Y"0, "#Z"3;" \
34  "\n\tmadc.lo.u64.cc "#Z"4, "#X"2, "#Y"2, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"2, "#Y"2, "#Z"5;" \
35  "\n\tmadc.lo.u64.cc "#Z"6, "#X"2, "#Y"4, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"2, "#Y"4, "#Z"7;" \
36  "\n\taddc.u64 "#Z"8, 0, "#Z"8;" \
37 \
38 \
39  "\n\tmad.lo.u64.cc "#Z"4, "#X"3, "#Y"1, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"3, "#Y"1, "#Z"5;" \
40  "\n\tmadc.lo.u64.cc "#Z"6, "#X"3, "#Y"3, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"3, "#Y"3, "#Z"7;" \
41  "\n\tmadc.lo.u64.cc "#Z"8, "#X"3, "#Y"5, "#Z"8; madc.hi.u64 "#Z"9, "#X"3, "#Y"5, 0;" \
42 \
43  "\n\tmad.lo.u64.cc "#Z"3, "#X"3, "#Y"0, "#Z"3; madc.hi.u64.cc "#Z"4, "#X"3, "#Y"0, "#Z"4;" \
44  "\n\tmadc.lo.u64.cc "#Z"5, "#X"3, "#Y"2, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"3, "#Y"2, "#Z"6;" \
45  "\n\tmadc.lo.u64.cc "#Z"7, "#X"3, "#Y"4, "#Z"7; madc.hi.u64.cc "#Z"8, "#X"3, "#Y"4, "#Z"8;" \
46  "\n\taddc.u64 "#Z"9, 0, "#Z"9;" \
47 \
48 \
49  "\n\tmad.lo.u64.cc "#Z"5, "#X"4, "#Y"1, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"4, "#Y"1, "#Z"6;" \
50  "\n\tmadc.lo.u64.cc "#Z"7, "#X"4, "#Y"3, "#Z"7; madc.hi.u64.cc "#Z"8, "#X"4, "#Y"3, "#Z"8;" \
51  "\n\tmadc.lo.u64.cc "#Z"9, "#X"4, "#Y"5, "#Z"9; madc.hi.u64 "#Z"a, "#X"4, "#Y"5, 0;" \
52 \
53  "\n\tmad.lo.u64.cc "#Z"4, "#X"4, "#Y"0, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"4, "#Y"0, "#Z"5;" \
54  "\n\tmadc.lo.u64.cc "#Z"6, "#X"4, "#Y"2, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"4, "#Y"2, "#Z"7;" \
55  "\n\tmadc.lo.u64.cc "#Z"8, "#X"4, "#Y"4, "#Z"8; madc.hi.u64.cc "#Z"9, "#X"4, "#Y"4, "#Z"9;" \
56  "\n\taddc.u64 "#Z"a, 0, "#Z"a;" \
57 \
58 \
59  "\n\tmad.lo.u64.cc "#Z"6, "#X"5, "#Y"1, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"5, "#Y"1, "#Z"7;" \
60  "\n\tmadc.lo.u64.cc "#Z"8, "#X"5, "#Y"3, "#Z"8; madc.hi.u64.cc "#Z"9, "#X"5, "#Y"3, "#Z"9;" \
61  "\n\tmadc.lo.u64.cc "#Z"a, "#X"5, "#Y"5, "#Z"a; madc.hi.u64 "#Z"b, "#X"5, "#Y"5, 0;" \
62 \
63  "\n\tmad.lo.u64.cc "#Z"5, "#X"5, "#Y"0, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"5, "#Y"0, "#Z"6;" \
64  "\n\tmadc.lo.u64.cc "#Z"7, "#X"5, "#Y"2, "#Z"7; madc.hi.u64.cc "#Z"8, "#X"5, "#Y"2, "#Z"8;" \
65  "\n\tmadc.lo.u64.cc "#Z"9, "#X"5, "#Y"4, "#Z"9; madc.hi.u64.cc "#Z"a, "#X"5, "#Y"4, "#Z"a;" \
66  "\n\taddc.u64 "#Z"b, 0, "#Z"b;"
67 
68 #endif
69 // vim: ts=4 et sw=4 si