12 #define FP_REDUCE12(Z) \ 
   18     "\n\tmul.hi.u64     q0, 0x13E207F56591BA2EU, "#Z"a;" \
 
   20     "\n\tmad.lo.u64.cc  q0, 0x13E207F56591BA2EU, "#Z"b, q0;" \
 
   21     "\n\tmadc.hi.u64    q1, 0x13E207F56591BA2EU, "#Z"b,  0;" \
 
   25     "\n\tmad.hi.u64.cc  q0, 0x997167A058F1C07BU, "#Z"9, q0;" \
 
   26     "\n\tmadc.lo.u64.cc q1, 0x997167A058F1C07BU, "#Z"b, q1;" \
 
   27     "\n\tmadc.hi.u64    q2, 0x997167A058F1C07BU, "#Z"b,  0;" \
 
   29     "\n\tmad.lo.u64.cc  q0, 0x997167A058F1C07BU, "#Z"a, q0;" \
 
   30     "\n\tmadc.hi.u64.cc q1, 0x997167A058F1C07BU, "#Z"a, q1;" \
 
   31     "\n\taddc.u64       q2, q2, 0;" \
 
   35     "\n\tmad.lo.u64.cc  q0, 0xDF4771E0286779D3U, "#Z"9, q0;" \
 
   36     "\n\tmadc.hi.u64.cc q1, 0xDF4771E0286779D3U, "#Z"9, q1;" \
 
   37     "\n\tmadc.lo.u64.cc q2, 0xDF4771E0286779D3U, "#Z"b, q2;" \
 
   38     "\n\tmadc.hi.u64    q3, 0xDF4771E0286779D3U, "#Z"b,  0;" \
 
   40     "\n\tmad.hi.u64.cc  q0, 0xDF4771E0286779D3U, "#Z"8, q0;" \
 
   41     "\n\tmadc.lo.u64.cc q1, 0xDF4771E0286779D3U, "#Z"a, q1;" \
 
   42     "\n\tmadc.hi.u64.cc q2, 0xDF4771E0286779D3U, "#Z"a, q2;" \
 
   43     "\n\taddc.u64       q3, q3, 0;" \
 
   47     "\n\tmad.hi.u64.cc  q0, 0x1B82741FF6A0A94BU, "#Z"7, q0;" \
 
   48     "\n\tmadc.lo.u64.cc q1, 0x1B82741FF6A0A94BU, "#Z"9, q1;" \
 
   49     "\n\tmadc.hi.u64.cc q2, 0x1B82741FF6A0A94BU, "#Z"9, q2;" \
 
   50     "\n\tmadc.lo.u64.cc q3, 0x1B82741FF6A0A94BU, "#Z"b, q3;" \
 
   51     "\n\tmadc.hi.u64    q4, 0x1B82741FF6A0A94BU, "#Z"b,  0;" \
 
   53     "\n\tmad.lo.u64.cc  q0, 0x1B82741FF6A0A94BU, "#Z"8, q0;" \
 
   54     "\n\tmadc.hi.u64.cc q1, 0x1B82741FF6A0A94BU, "#Z"8, q1;" \
 
   55     "\n\tmadc.lo.u64.cc q2, 0x1B82741FF6A0A94BU, "#Z"a, q2;" \
 
   56     "\n\tmadc.hi.u64.cc q3, 0x1B82741FF6A0A94BU, "#Z"a, q3;" \
 
   57     "\n\taddc.u64       q4, q4, 0;" \
 
   61     "\n\tmad.lo.u64.cc  q0, 0x28101B0CC7A6BA29U, "#Z"7, q0;" \
 
   62     "\n\tmadc.hi.u64.cc q1, 0x28101B0CC7A6BA29U, "#Z"7, q1;" \
 
   63     "\n\tmadc.lo.u64.cc q2, 0x28101B0CC7A6BA29U, "#Z"9, q2;" \
 
   64     "\n\tmadc.hi.u64.cc q3, 0x28101B0CC7A6BA29U, "#Z"9, q3;" \
 
   65     "\n\tmadc.lo.u64.cc q4, 0x28101B0CC7A6BA29U, "#Z"b, q4;" \
 
   66     "\n\tmadc.hi.u64    q5, 0x28101B0CC7A6BA29U, "#Z"b,  0;" \
 
   68     "\n\tmad.hi.u64.cc  q0, 0x28101B0CC7A6BA29U, "#Z"6, q0;" \
 
   69     "\n\tmadc.lo.u64.cc q1, 0x28101B0CC7A6BA29U, "#Z"8, q1;" \
 
   70     "\n\tmadc.hi.u64.cc q2, 0x28101B0CC7A6BA29U, "#Z"8, q2;" \
 
   71     "\n\tmadc.lo.u64.cc q3, 0x28101B0CC7A6BA29U, "#Z"a, q3;" \
 
   72     "\n\tmadc.hi.u64.cc q4, 0x28101B0CC7A6BA29U, "#Z"a, q4;" \
 
   73     "\n\taddc.u64       q5, q5, 0;" \
 
   77     "\n\tmad.hi.u64.cc  q0, 0xD835D2F3CC9E45CEU, "#Z"5, q0;" \
 
   78     "\n\tmadc.lo.u64.cc q1, 0xD835D2F3CC9E45CEU, "#Z"7, q1;" \
 
   79     "\n\tmadc.hi.u64.cc q2, 0xD835D2F3CC9E45CEU, "#Z"7, q2;" \
 
   80     "\n\tmadc.lo.u64.cc q3, 0xD835D2F3CC9E45CEU, "#Z"9, q3;" \
 
   81     "\n\tmadc.hi.u64.cc q4, 0xD835D2F3CC9E45CEU, "#Z"9, q4;" \
 
   82     "\n\tmadc.lo.u64.cc q5, 0xD835D2F3CC9E45CEU, "#Z"b, q5;" \
 
   83     "\n\tmadc.hi.u64    q6, 0xD835D2F3CC9E45CEU, "#Z"b,  0;" \
 
   85     "\n\tmad.lo.u64.cc  q0, 0xD835D2F3CC9E45CEU, "#Z"6, q0;" \
 
   86     "\n\tmadc.hi.u64.cc q1, 0xD835D2F3CC9E45CEU, "#Z"6, q1;" \
 
   87     "\n\tmadc.lo.u64.cc q2, 0xD835D2F3CC9E45CEU, "#Z"8, q2;" \
 
   88     "\n\tmadc.hi.u64.cc q3, 0xD835D2F3CC9E45CEU, "#Z"8, q3;" \
 
   89     "\n\tmadc.lo.u64.cc q4, 0xD835D2F3CC9E45CEU, "#Z"a, q4;" \
 
   90     "\n\tmadc.hi.u64.cc q5, 0xD835D2F3CC9E45CEU, "#Z"a, q5;" \
 
   91     "\n\taddc.u64       q6, q6, 0;" \
 
   95     "\n\tmad.lo.u64.cc  q0, 0x0000000000000009U, "#Z"5, q0;" \
 
   96     "\n\tmadc.hi.u64.cc q1, 0x0000000000000009U, "#Z"5, q1;" \
 
   97     "\n\tmadc.lo.u64.cc q2, 0x0000000000000009U, "#Z"7, q2;" \
 
   98     "\n\tmadc.hi.u64.cc q3, 0x0000000000000009U, "#Z"7, q3;" \
 
   99     "\n\tmadc.lo.u64.cc q4, 0x0000000000000009U, "#Z"9, q4;" \
 
  100     "\n\tmadc.hi.u64.cc q5, 0x0000000000000009U, "#Z"9, q5;" \
 
  101     "\n\tmadc.lo.u64.cc q6, 0x0000000000000009U, "#Z"b, q6;" \
 
  102     "\n\tmadc.hi.u64    q7, 0x0000000000000009U, "#Z"b,  0;" \
 
  104     "\n\tmad.hi.u64.cc  q0, 0x0000000000000009U, "#Z"4, q0;" \
 
  105     "\n\tmadc.lo.u64.cc q1, 0x0000000000000009U, "#Z"6, q1;" \
 
  106     "\n\tmadc.hi.u64.cc q2, 0x0000000000000009U, "#Z"6, q2;" \
 
  107     "\n\tmadc.lo.u64.cc q3, 0x0000000000000009U, "#Z"8, q3;" \
 
  108     "\n\tmadc.hi.u64.cc q4, 0x0000000000000009U, "#Z"8, q4;" \
 
  109     "\n\tmadc.lo.u64.cc q5, 0x0000000000000009U, "#Z"a, q5;" \
 
  110     "\n\tmadc.hi.u64.cc q6, 0x0000000000000009U, "#Z"a, q6;" \
 
  111     "\n\taddc.u64       q7, q7, 0;" \
 
  120     "\n\tmul.lo.u64     r5, 0x1A0111EA397FE69AU, q1    ;" \
 
  121     "\n\tmul.hi.u64     r6, 0x1A0111EA397FE69AU, q1    ;" \
 
  122     "\n\tmad.lo.u64     r6, 0x1A0111EA397FE69AU, q2, r6;" \
 
  126     "\n\tmul.lo.u64     r4, 0x4B1BA7B6434BACD7U, q1    ;" \
 
  127     "\n\tmad.hi.u64.cc  r5, 0x4B1BA7B6434BACD7U, q1, r5;" \
 
  128     "\n\tmadc.lo.u64    r6, 0x4B1BA7B6434BACD7U, q3, r6;" \
 
  130     "\n\tmad.lo.u64.cc  r5, 0x4B1BA7B6434BACD7U, q2, r5;" \
 
  131     "\n\tmadc.hi.u64    r6, 0x4B1BA7B6434BACD7U, q2, r6;" \
 
  135     "\n\tmul.lo.u64     r3, 0x64774B84F38512BFU, q1    ;" \
 
  136     "\n\tmad.hi.u64.cc  r4, 0x64774B84F38512BFU, q1, r4;" \
 
  137     "\n\tmadc.lo.u64.cc r5, 0x64774B84F38512BFU, q3, r5;" \
 
  138     "\n\tmadc.hi.u64    r6, 0x64774B84F38512BFU, q3, r6;" \
 
  140     "\n\tmad.lo.u64.cc  r4, 0x64774B84F38512BFU, q2, r4;" \
 
  141     "\n\tmadc.hi.u64.cc r5, 0x64774B84F38512BFU, q2, r5;" \
 
  142     "\n\tmadc.lo.u64    r6, 0x64774B84F38512BFU, q4, r6;" \
 
  146     "\n\tmul.lo.u64     r2, 0x6730D2A0F6B0F624U, q1    ;" \
 
  147     "\n\tmad.hi.u64.cc  r3, 0x6730D2A0F6B0F624U, q1, r3;" \
 
  148     "\n\tmadc.lo.u64.cc r4, 0x6730D2A0F6B0F624U, q3, r4;" \
 
  149     "\n\tmadc.hi.u64.cc r5, 0x6730D2A0F6B0F624U, q3, r5;" \
 
  150     "\n\tmadc.lo.u64    r6, 0x6730D2A0F6B0F624U, q5, r6;" \
 
  152     "\n\tmad.lo.u64.cc  r3, 0x6730D2A0F6B0F624U, q2, r3;" \
 
  153     "\n\tmadc.hi.u64.cc r4, 0x6730D2A0F6B0F624U, q2, r4;" \
 
  154     "\n\tmadc.lo.u64.cc r5, 0x6730D2A0F6B0F624U, q4, r5;" \
 
  155     "\n\tmadc.hi.u64    r6, 0x6730D2A0F6B0F624U, q4, r6;" \
 
  159     "\n\tmul.lo.u64     r1, 0x1EABFFFEB153FFFFU, q1    ;" \
 
  160     "\n\tmad.hi.u64.cc  r2, 0x1EABFFFEB153FFFFU, q1, r2;" \
 
  161     "\n\tmadc.lo.u64.cc r3, 0x1EABFFFEB153FFFFU, q3, r3;" \
 
  162     "\n\tmadc.hi.u64.cc r4, 0x1EABFFFEB153FFFFU, q3, r4;" \
 
  163     "\n\tmadc.lo.u64.cc r5, 0x1EABFFFEB153FFFFU, q5, r5;" \
 
  164     "\n\tmadc.hi.u64    r6, 0x1EABFFFEB153FFFFU, q5, r6;" \
 
  166     "\n\tmad.lo.u64.cc  r2, 0x1EABFFFEB153FFFFU, q2, r2;" \
 
  167     "\n\tmadc.hi.u64.cc r3, 0x1EABFFFEB153FFFFU, q2, r3;" \
 
  168     "\n\tmadc.lo.u64.cc r4, 0x1EABFFFEB153FFFFU, q4, r4;" \
 
  169     "\n\tmadc.hi.u64.cc r5, 0x1EABFFFEB153FFFFU, q4, r5;" \
 
  170     "\n\tmadc.lo.u64    r6, 0x1EABFFFEB153FFFFU, q6, r6;" \
 
  174     "\n\tmul.lo.u64     r0, 0xB9FEFFFFFFFFAAABU, q1    ;" \
 
  175     "\n\tmad.hi.u64.cc  r1, 0xB9FEFFFFFFFFAAABU, q1, r1;" \
 
  176     "\n\tmadc.lo.u64.cc r2, 0xB9FEFFFFFFFFAAABU, q3, r2;" \
 
  177     "\n\tmadc.hi.u64.cc r3, 0xB9FEFFFFFFFFAAABU, q3, r3;" \
 
  178     "\n\tmadc.lo.u64.cc r4, 0xB9FEFFFFFFFFAAABU, q5, r4;" \
 
  179     "\n\tmadc.hi.u64.cc r5, 0xB9FEFFFFFFFFAAABU, q5, r5;" \
 
  180     "\n\tmadc.lo.u64    r6, 0xB9FEFFFFFFFFAAABU, q7, r6;" \
 
  182     "\n\tmad.lo.u64.cc  r1, 0xB9FEFFFFFFFFAAABU, q2, r1;" \
 
  183     "\n\tmadc.hi.u64.cc r2, 0xB9FEFFFFFFFFAAABU, q2, r2;" \
 
  184     "\n\tmadc.lo.u64.cc r3, 0xB9FEFFFFFFFFAAABU, q4, r3;" \
 
  185     "\n\tmadc.hi.u64.cc r4, 0xB9FEFFFFFFFFAAABU, q4, r4;" \
 
  186     "\n\tmadc.lo.u64.cc r5, 0xB9FEFFFFFFFFAAABU, q6, r5;" \
 
  187     "\n\tmadc.hi.u64    r6, 0xB9FEFFFFFFFFAAABU, q6, r6;" \
 
  195     "\n\tsub.u64.cc  "#Z"0, "#Z"0, r0;" \
 
  196     "\n\tsubc.u64.cc "#Z"1, "#Z"1, r1;" \
 
  197     "\n\tsubc.u64.cc "#Z"2, "#Z"2, r2;" \
 
  198     "\n\tsubc.u64.cc "#Z"3, "#Z"3, r3;" \
 
  199     "\n\tsubc.u64.cc "#Z"4, "#Z"4, r4;" \
 
  200     "\n\tsubc.u64    "#Z"5, "#Z"5, r5;"