12 #define FP_SQR(Z, X) \
14 "\n\tmul.lo.u64 "#Z"5, "#X"0, "#X"5 ; mul.hi.u64 "#Z"6, "#X"0, "#X"5 ;" \
16 "\n\tmul.lo.u64 "#Z"4, "#X"0, "#X"4 ; mad.hi.u64.cc "#Z"5, "#X"0, "#X"4, "#Z"5;" \
17 "\n\tmadc.lo.u64.cc "#Z"6, "#X"1, "#X"5, "#Z"6; madc.hi.u64 "#Z"7, "#X"1, "#X"5, 0;" \
19 "\n\tmul.lo.u64 "#Z"3, "#X"0, "#X"3 ; mad.hi.u64.cc "#Z"4, "#X"0, "#X"3, "#Z"4;" \
20 "\n\tmadc.lo.u64.cc "#Z"5, "#X"1, "#X"4, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"1, "#X"4, "#Z"6;" \
21 "\n\tmadc.lo.u64.cc "#Z"7, "#X"2, "#X"5, "#Z"7; madc.hi.u64 "#Z"8, "#X"2, "#X"5, 0;" \
23 "\n\tmul.lo.u64 "#Z"2, "#X"0, "#X"2 ; mad.hi.u64.cc "#Z"3, "#X"0, "#X"2, "#Z"3;" \
24 "\n\tmadc.lo.u64.cc "#Z"4, "#X"1, "#X"3, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"1, "#X"3, "#Z"5;" \
25 "\n\tmadc.lo.u64.cc "#Z"6, "#X"2, "#X"4, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"2, "#X"4, "#Z"7;" \
26 "\n\tmadc.lo.u64.cc "#Z"8, "#X"3, "#X"5, "#Z"8; madc.hi.u64 "#Z"9, "#X"3, "#X"5, 0;" \
28 "\n\tmul.lo.u64 "#Z"1, "#X"0, "#X"1 ; mad.hi.u64.cc "#Z"2, "#X"0, "#X"1, "#Z"2;" \
29 "\n\tmadc.lo.u64.cc "#Z"3, "#X"1, "#X"2, "#Z"3; madc.hi.u64.cc "#Z"4, "#X"1, "#X"2, "#Z"4;" \
30 "\n\tmadc.lo.u64.cc "#Z"5, "#X"2, "#X"3, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"2, "#X"3, "#Z"6;" \
31 "\n\tmadc.lo.u64.cc "#Z"7, "#X"3, "#X"4, "#Z"7; madc.hi.u64.cc "#Z"8, "#X"3, "#X"4, "#Z"8;" \
32 "\n\tmadc.lo.u64.cc "#Z"9, "#X"4, "#X"5, "#Z"9; madc.hi.u64 "#Z"a, "#X"4, "#X"5, 0;" \
34 "\n\tadd.u64.cc "#Z"1, "#Z"1, "#Z"1;" \
35 "\n\taddc.u64.cc "#Z"2, "#Z"2, "#Z"2;" \
36 "\n\taddc.u64.cc "#Z"3, "#Z"3, "#Z"3;" \
37 "\n\taddc.u64.cc "#Z"4, "#Z"4, "#Z"4;" \
38 "\n\taddc.u64.cc "#Z"5, "#Z"5, "#Z"5;" \
39 "\n\taddc.u64.cc "#Z"6, "#Z"6, "#Z"6;" \
40 "\n\taddc.u64.cc "#Z"7, "#Z"7, "#Z"7;" \
41 "\n\taddc.u64.cc "#Z"8, "#Z"8, "#Z"8;" \
42 "\n\taddc.u64.cc "#Z"9, "#Z"9, "#Z"9;" \
43 "\n\taddc.u64.cc "#Z"a, "#Z"a, "#Z"a;" \
44 "\n\taddc.u64 "#Z"b, 0, 0;" \
46 "\n\tmul.lo.u64 "#Z"0, "#X"0, "#X"0 ; mad.hi.u64.cc "#Z"1, "#X"0, "#X"0, "#Z"1;" \
47 "\n\tmadc.lo.u64.cc "#Z"2, "#X"1, "#X"1, "#Z"2; madc.hi.u64.cc "#Z"3, "#X"1, "#X"1, "#Z"3;" \
48 "\n\tmadc.lo.u64.cc "#Z"4, "#X"2, "#X"2, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"2, "#X"2, "#Z"5;" \
49 "\n\tmadc.lo.u64.cc "#Z"6, "#X"3, "#X"3, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"3, "#X"3, "#Z"7;" \
50 "\n\tmadc.lo.u64.cc "#Z"8, "#X"4, "#X"4, "#Z"8; madc.hi.u64.cc "#Z"9, "#X"4, "#X"4, "#Z"9;" \
51 "\n\tmadc.lo.u64.cc "#Z"a, "#X"5, "#X"5, "#Z"a; madc.hi.u64 "#Z"b, "#X"5, "#X"5, "#Z"b;"