12 #define FP_REDUCE12(Z) \
18 "\n\tmul.hi.u64 q0, 0x13E207F56591BA2EU, "#Z"a;" \
20 "\n\tmad.lo.u64.cc q0, 0x13E207F56591BA2EU, "#Z"b, q0;" \
21 "\n\tmadc.hi.u64 q1, 0x13E207F56591BA2EU, "#Z"b, 0;" \
25 "\n\tmad.hi.u64.cc q0, 0x997167A058F1C07BU, "#Z"9, q0;" \
26 "\n\tmadc.lo.u64.cc q1, 0x997167A058F1C07BU, "#Z"b, q1;" \
27 "\n\tmadc.hi.u64 q2, 0x997167A058F1C07BU, "#Z"b, 0;" \
29 "\n\tmad.lo.u64.cc q0, 0x997167A058F1C07BU, "#Z"a, q0;" \
30 "\n\tmadc.hi.u64.cc q1, 0x997167A058F1C07BU, "#Z"a, q1;" \
31 "\n\taddc.u64 q2, q2, 0;" \
35 "\n\tmad.lo.u64.cc q0, 0xDF4771E0286779D3U, "#Z"9, q0;" \
36 "\n\tmadc.hi.u64.cc q1, 0xDF4771E0286779D3U, "#Z"9, q1;" \
37 "\n\tmadc.lo.u64.cc q2, 0xDF4771E0286779D3U, "#Z"b, q2;" \
38 "\n\tmadc.hi.u64 q3, 0xDF4771E0286779D3U, "#Z"b, 0;" \
40 "\n\tmad.hi.u64.cc q0, 0xDF4771E0286779D3U, "#Z"8, q0;" \
41 "\n\tmadc.lo.u64.cc q1, 0xDF4771E0286779D3U, "#Z"a, q1;" \
42 "\n\tmadc.hi.u64.cc q2, 0xDF4771E0286779D3U, "#Z"a, q2;" \
43 "\n\taddc.u64 q3, q3, 0;" \
47 "\n\tmad.hi.u64.cc q0, 0x1B82741FF6A0A94BU, "#Z"7, q0;" \
48 "\n\tmadc.lo.u64.cc q1, 0x1B82741FF6A0A94BU, "#Z"9, q1;" \
49 "\n\tmadc.hi.u64.cc q2, 0x1B82741FF6A0A94BU, "#Z"9, q2;" \
50 "\n\tmadc.lo.u64.cc q3, 0x1B82741FF6A0A94BU, "#Z"b, q3;" \
51 "\n\tmadc.hi.u64 q4, 0x1B82741FF6A0A94BU, "#Z"b, 0;" \
53 "\n\tmad.lo.u64.cc q0, 0x1B82741FF6A0A94BU, "#Z"8, q0;" \
54 "\n\tmadc.hi.u64.cc q1, 0x1B82741FF6A0A94BU, "#Z"8, q1;" \
55 "\n\tmadc.lo.u64.cc q2, 0x1B82741FF6A0A94BU, "#Z"a, q2;" \
56 "\n\tmadc.hi.u64.cc q3, 0x1B82741FF6A0A94BU, "#Z"a, q3;" \
57 "\n\taddc.u64 q4, q4, 0;" \
61 "\n\tmad.lo.u64.cc q0, 0x28101B0CC7A6BA29U, "#Z"7, q0;" \
62 "\n\tmadc.hi.u64.cc q1, 0x28101B0CC7A6BA29U, "#Z"7, q1;" \
63 "\n\tmadc.lo.u64.cc q2, 0x28101B0CC7A6BA29U, "#Z"9, q2;" \
64 "\n\tmadc.hi.u64.cc q3, 0x28101B0CC7A6BA29U, "#Z"9, q3;" \
65 "\n\tmadc.lo.u64.cc q4, 0x28101B0CC7A6BA29U, "#Z"b, q4;" \
66 "\n\tmadc.hi.u64 q5, 0x28101B0CC7A6BA29U, "#Z"b, 0;" \
68 "\n\tmad.hi.u64.cc q0, 0x28101B0CC7A6BA29U, "#Z"6, q0;" \
69 "\n\tmadc.lo.u64.cc q1, 0x28101B0CC7A6BA29U, "#Z"8, q1;" \
70 "\n\tmadc.hi.u64.cc q2, 0x28101B0CC7A6BA29U, "#Z"8, q2;" \
71 "\n\tmadc.lo.u64.cc q3, 0x28101B0CC7A6BA29U, "#Z"a, q3;" \
72 "\n\tmadc.hi.u64.cc q4, 0x28101B0CC7A6BA29U, "#Z"a, q4;" \
73 "\n\taddc.u64 q5, q5, 0;" \
77 "\n\tmad.hi.u64.cc q0, 0xD835D2F3CC9E45CEU, "#Z"5, q0;" \
78 "\n\tmadc.lo.u64.cc q1, 0xD835D2F3CC9E45CEU, "#Z"7, q1;" \
79 "\n\tmadc.hi.u64.cc q2, 0xD835D2F3CC9E45CEU, "#Z"7, q2;" \
80 "\n\tmadc.lo.u64.cc q3, 0xD835D2F3CC9E45CEU, "#Z"9, q3;" \
81 "\n\tmadc.hi.u64.cc q4, 0xD835D2F3CC9E45CEU, "#Z"9, q4;" \
82 "\n\tmadc.lo.u64.cc q5, 0xD835D2F3CC9E45CEU, "#Z"b, q5;" \
83 "\n\tmadc.hi.u64 q6, 0xD835D2F3CC9E45CEU, "#Z"b, 0;" \
85 "\n\tmad.lo.u64.cc q0, 0xD835D2F3CC9E45CEU, "#Z"6, q0;" \
86 "\n\tmadc.hi.u64.cc q1, 0xD835D2F3CC9E45CEU, "#Z"6, q1;" \
87 "\n\tmadc.lo.u64.cc q2, 0xD835D2F3CC9E45CEU, "#Z"8, q2;" \
88 "\n\tmadc.hi.u64.cc q3, 0xD835D2F3CC9E45CEU, "#Z"8, q3;" \
89 "\n\tmadc.lo.u64.cc q4, 0xD835D2F3CC9E45CEU, "#Z"a, q4;" \
90 "\n\tmadc.hi.u64.cc q5, 0xD835D2F3CC9E45CEU, "#Z"a, q5;" \
91 "\n\taddc.u64 q6, q6, 0;" \
95 "\n\tmad.lo.u64.cc q0, 0x0000000000000009U, "#Z"5, q0;" \
96 "\n\tmadc.hi.u64.cc q1, 0x0000000000000009U, "#Z"5, q1;" \
97 "\n\tmadc.lo.u64.cc q2, 0x0000000000000009U, "#Z"7, q2;" \
98 "\n\tmadc.hi.u64.cc q3, 0x0000000000000009U, "#Z"7, q3;" \
99 "\n\tmadc.lo.u64.cc q4, 0x0000000000000009U, "#Z"9, q4;" \
100 "\n\tmadc.hi.u64.cc q5, 0x0000000000000009U, "#Z"9, q5;" \
101 "\n\tmadc.lo.u64.cc q6, 0x0000000000000009U, "#Z"b, q6;" \
102 "\n\tmadc.hi.u64 q7, 0x0000000000000009U, "#Z"b, 0;" \
104 "\n\tmad.hi.u64.cc q0, 0x0000000000000009U, "#Z"4, q0;" \
105 "\n\tmadc.lo.u64.cc q1, 0x0000000000000009U, "#Z"6, q1;" \
106 "\n\tmadc.hi.u64.cc q2, 0x0000000000000009U, "#Z"6, q2;" \
107 "\n\tmadc.lo.u64.cc q3, 0x0000000000000009U, "#Z"8, q3;" \
108 "\n\tmadc.hi.u64.cc q4, 0x0000000000000009U, "#Z"8, q4;" \
109 "\n\tmadc.lo.u64.cc q5, 0x0000000000000009U, "#Z"a, q5;" \
110 "\n\tmadc.hi.u64.cc q6, 0x0000000000000009U, "#Z"a, q6;" \
111 "\n\taddc.u64 q7, q7, 0;" \
120 "\n\tmul.lo.u64 r5, 0x1A0111EA397FE69AU, q1 ;" \
121 "\n\tmul.hi.u64 r6, 0x1A0111EA397FE69AU, q1 ;" \
122 "\n\tmad.lo.u64 r6, 0x1A0111EA397FE69AU, q2, r6;" \
126 "\n\tmul.lo.u64 r4, 0x4B1BA7B6434BACD7U, q1 ;" \
127 "\n\tmad.hi.u64.cc r5, 0x4B1BA7B6434BACD7U, q1, r5;" \
128 "\n\tmadc.lo.u64 r6, 0x4B1BA7B6434BACD7U, q3, r6;" \
130 "\n\tmad.lo.u64.cc r5, 0x4B1BA7B6434BACD7U, q2, r5;" \
131 "\n\tmadc.hi.u64 r6, 0x4B1BA7B6434BACD7U, q2, r6;" \
135 "\n\tmul.lo.u64 r3, 0x64774B84F38512BFU, q1 ;" \
136 "\n\tmad.hi.u64.cc r4, 0x64774B84F38512BFU, q1, r4;" \
137 "\n\tmadc.lo.u64.cc r5, 0x64774B84F38512BFU, q3, r5;" \
138 "\n\tmadc.hi.u64 r6, 0x64774B84F38512BFU, q3, r6;" \
140 "\n\tmad.lo.u64.cc r4, 0x64774B84F38512BFU, q2, r4;" \
141 "\n\tmadc.hi.u64.cc r5, 0x64774B84F38512BFU, q2, r5;" \
142 "\n\tmadc.lo.u64 r6, 0x64774B84F38512BFU, q4, r6;" \
146 "\n\tmul.lo.u64 r2, 0x6730D2A0F6B0F624U, q1 ;" \
147 "\n\tmad.hi.u64.cc r3, 0x6730D2A0F6B0F624U, q1, r3;" \
148 "\n\tmadc.lo.u64.cc r4, 0x6730D2A0F6B0F624U, q3, r4;" \
149 "\n\tmadc.hi.u64.cc r5, 0x6730D2A0F6B0F624U, q3, r5;" \
150 "\n\tmadc.lo.u64 r6, 0x6730D2A0F6B0F624U, q5, r6;" \
152 "\n\tmad.lo.u64.cc r3, 0x6730D2A0F6B0F624U, q2, r3;" \
153 "\n\tmadc.hi.u64.cc r4, 0x6730D2A0F6B0F624U, q2, r4;" \
154 "\n\tmadc.lo.u64.cc r5, 0x6730D2A0F6B0F624U, q4, r5;" \
155 "\n\tmadc.hi.u64 r6, 0x6730D2A0F6B0F624U, q4, r6;" \
159 "\n\tmul.lo.u64 r1, 0x1EABFFFEB153FFFFU, q1 ;" \
160 "\n\tmad.hi.u64.cc r2, 0x1EABFFFEB153FFFFU, q1, r2;" \
161 "\n\tmadc.lo.u64.cc r3, 0x1EABFFFEB153FFFFU, q3, r3;" \
162 "\n\tmadc.hi.u64.cc r4, 0x1EABFFFEB153FFFFU, q3, r4;" \
163 "\n\tmadc.lo.u64.cc r5, 0x1EABFFFEB153FFFFU, q5, r5;" \
164 "\n\tmadc.hi.u64 r6, 0x1EABFFFEB153FFFFU, q5, r6;" \
166 "\n\tmad.lo.u64.cc r2, 0x1EABFFFEB153FFFFU, q2, r2;" \
167 "\n\tmadc.hi.u64.cc r3, 0x1EABFFFEB153FFFFU, q2, r3;" \
168 "\n\tmadc.lo.u64.cc r4, 0x1EABFFFEB153FFFFU, q4, r4;" \
169 "\n\tmadc.hi.u64.cc r5, 0x1EABFFFEB153FFFFU, q4, r5;" \
170 "\n\tmadc.lo.u64 r6, 0x1EABFFFEB153FFFFU, q6, r6;" \
174 "\n\tmul.lo.u64 r0, 0xB9FEFFFFFFFFAAABU, q1 ;" \
175 "\n\tmad.hi.u64.cc r1, 0xB9FEFFFFFFFFAAABU, q1, r1;" \
176 "\n\tmadc.lo.u64.cc r2, 0xB9FEFFFFFFFFAAABU, q3, r2;" \
177 "\n\tmadc.hi.u64.cc r3, 0xB9FEFFFFFFFFAAABU, q3, r3;" \
178 "\n\tmadc.lo.u64.cc r4, 0xB9FEFFFFFFFFAAABU, q5, r4;" \
179 "\n\tmadc.hi.u64.cc r5, 0xB9FEFFFFFFFFAAABU, q5, r5;" \
180 "\n\tmadc.lo.u64 r6, 0xB9FEFFFFFFFFAAABU, q7, r6;" \
182 "\n\tmad.lo.u64.cc r1, 0xB9FEFFFFFFFFAAABU, q2, r1;" \
183 "\n\tmadc.hi.u64.cc r2, 0xB9FEFFFFFFFFAAABU, q2, r2;" \
184 "\n\tmadc.lo.u64.cc r3, 0xB9FEFFFFFFFFAAABU, q4, r3;" \
185 "\n\tmadc.hi.u64.cc r4, 0xB9FEFFFFFFFFAAABU, q4, r4;" \
186 "\n\tmadc.lo.u64.cc r5, 0xB9FEFFFFFFFFAAABU, q6, r5;" \
187 "\n\tmadc.hi.u64 r6, 0xB9FEFFFFFFFFAAABU, q6, r6;" \
195 "\n\tsub.u64.cc "#Z"0, "#Z"0, r0;" \
196 "\n\tsubc.u64.cc "#Z"1, "#Z"1, r1;" \
197 "\n\tsubc.u64.cc "#Z"2, "#Z"2, r2;" \
198 "\n\tsubc.u64.cc "#Z"3, "#Z"3, r3;" \
199 "\n\tsubc.u64.cc "#Z"4, "#Z"4, r4;" \
200 "\n\tsubc.u64 "#Z"5, "#Z"5, r5;"