12 #define FP_SQR(Z, X) \ 
   14     "\n\tmul.lo.u64     "#Z"5, "#X"0, "#X"5    ; mul.hi.u64     "#Z"6, "#X"0, "#X"5    ;" \
 
   16     "\n\tmul.lo.u64     "#Z"4, "#X"0, "#X"4    ; mad.hi.u64.cc  "#Z"5, "#X"0, "#X"4, "#Z"5;" \
 
   17     "\n\tmadc.lo.u64.cc "#Z"6, "#X"1, "#X"5, "#Z"6; madc.hi.u64    "#Z"7, "#X"1, "#X"5,  0;" \
 
   19     "\n\tmul.lo.u64     "#Z"3, "#X"0, "#X"3    ; mad.hi.u64.cc  "#Z"4, "#X"0, "#X"3, "#Z"4;" \
 
   20     "\n\tmadc.lo.u64.cc "#Z"5, "#X"1, "#X"4, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"1, "#X"4, "#Z"6;" \
 
   21     "\n\tmadc.lo.u64.cc "#Z"7, "#X"2, "#X"5, "#Z"7; madc.hi.u64    "#Z"8, "#X"2, "#X"5,  0;" \
 
   23     "\n\tmul.lo.u64     "#Z"2, "#X"0, "#X"2    ; mad.hi.u64.cc  "#Z"3, "#X"0, "#X"2, "#Z"3;" \
 
   24     "\n\tmadc.lo.u64.cc "#Z"4, "#X"1, "#X"3, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"1, "#X"3, "#Z"5;" \
 
   25     "\n\tmadc.lo.u64.cc "#Z"6, "#X"2, "#X"4, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"2, "#X"4, "#Z"7;" \
 
   26     "\n\tmadc.lo.u64.cc "#Z"8, "#X"3, "#X"5, "#Z"8; madc.hi.u64    "#Z"9, "#X"3, "#X"5,  0;" \
 
   28     "\n\tmul.lo.u64     "#Z"1, "#X"0, "#X"1    ; mad.hi.u64.cc  "#Z"2, "#X"0, "#X"1, "#Z"2;" \
 
   29     "\n\tmadc.lo.u64.cc "#Z"3, "#X"1, "#X"2, "#Z"3; madc.hi.u64.cc "#Z"4, "#X"1, "#X"2, "#Z"4;" \
 
   30     "\n\tmadc.lo.u64.cc "#Z"5, "#X"2, "#X"3, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"2, "#X"3, "#Z"6;" \
 
   31     "\n\tmadc.lo.u64.cc "#Z"7, "#X"3, "#X"4, "#Z"7; madc.hi.u64.cc "#Z"8, "#X"3, "#X"4, "#Z"8;" \
 
   32     "\n\tmadc.lo.u64.cc "#Z"9, "#X"4, "#X"5, "#Z"9; madc.hi.u64    "#Z"a, "#X"4, "#X"5,  0;" \
 
   34     "\n\tadd.u64.cc  "#Z"1, "#Z"1, "#Z"1;" \
 
   35     "\n\taddc.u64.cc "#Z"2, "#Z"2, "#Z"2;" \
 
   36     "\n\taddc.u64.cc "#Z"3, "#Z"3, "#Z"3;" \
 
   37     "\n\taddc.u64.cc "#Z"4, "#Z"4, "#Z"4;" \
 
   38     "\n\taddc.u64.cc "#Z"5, "#Z"5, "#Z"5;" \
 
   39     "\n\taddc.u64.cc "#Z"6, "#Z"6, "#Z"6;" \
 
   40     "\n\taddc.u64.cc "#Z"7, "#Z"7, "#Z"7;" \
 
   41     "\n\taddc.u64.cc "#Z"8, "#Z"8, "#Z"8;" \
 
   42     "\n\taddc.u64.cc "#Z"9, "#Z"9, "#Z"9;" \
 
   43     "\n\taddc.u64.cc "#Z"a, "#Z"a, "#Z"a;" \
 
   44     "\n\taddc.u64    "#Z"b,  0,  0;" \
 
   46     "\n\tmul.lo.u64     "#Z"0, "#X"0, "#X"0    ; mad.hi.u64.cc  "#Z"1, "#X"0, "#X"0, "#Z"1;" \
 
   47     "\n\tmadc.lo.u64.cc "#Z"2, "#X"1, "#X"1, "#Z"2; madc.hi.u64.cc "#Z"3, "#X"1, "#X"1, "#Z"3;" \
 
   48     "\n\tmadc.lo.u64.cc "#Z"4, "#X"2, "#X"2, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"2, "#X"2, "#Z"5;" \
 
   49     "\n\tmadc.lo.u64.cc "#Z"6, "#X"3, "#X"3, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"3, "#X"3, "#Z"7;" \
 
   50     "\n\tmadc.lo.u64.cc "#Z"8, "#X"4, "#X"4, "#Z"8; madc.hi.u64.cc "#Z"9, "#X"4, "#X"4, "#Z"9;" \
 
   51     "\n\tmadc.lo.u64.cc "#Z"a, "#X"5, "#X"5, "#Z"a; madc.hi.u64    "#Z"b, "#X"5, "#X"5, "#Z"b;"