25 "\n\t.reg .u64 t<7>, q5;"
30 "\n\tmul.hi.u64 q5, %5, 9;"
34 "\n\tmul.lo.u64 t0, q5, 0xB9FEFFFFFFFFAAABU;"
35 "\n\tmul.hi.u64 t1, q5, 0xB9FEFFFFFFFFAAABU;"
37 "\n\tmul.lo.u64 t2, q5, 0x6730D2A0F6B0F624U;"
38 "\n\tmul.hi.u64 t3, q5, 0x6730D2A0F6B0F624U;"
40 "\n\tmul.lo.u64 t4, q5, 0x4B1BA7B6434BACD7U;"
41 "\n\tmul.hi.u64 t5, q5, 0x4B1BA7B6434BACD7U;"
44 "\n\tmad.lo.u64.cc t1, q5, 0x1EABFFFEB153FFFFU, t1;"
45 "\n\tmadc.hi.u64.cc t2, q5, 0x1EABFFFEB153FFFFU, t2;"
47 "\n\tmadc.lo.u64.cc t3, q5, 0x64774B84F38512BFU, t3;"
48 "\n\tmadc.hi.u64.cc t4, q5, 0x64774B84F38512BFU, t4;"
50 "\n\tmadc.lo.u64.cc t5, q5, 0x1A0111EA397FE69AU, t5;"
51 "\n\tmadc.hi.u64.cc t6, q5, 0x1A0111EA397FE69AU, 0;"
61 "\n\tsub.u64.cc %0, %0, t0;"
62 "\n\tsubc.u64.cc %1, %1, t1;"
63 "\n\tsubc.u64.cc %2, %2, t2;"
64 "\n\tsubc.u64.cc %3, %3, t3;"
65 "\n\tsubc.u64.cc %4, %4, t4;"
66 "\n\tsubc.u64 %5, %5, t5;"
68 "\n\tsub.u64.cc t0, %0, 0xB9FEFFFFFFFFAAABU;"
69 "\n\tsubc.u64.cc t1, %1, 0x1EABFFFEB153FFFFU;"
70 "\n\tsubc.u64.cc t2, %2, 0x6730D2A0F6B0F624U;"
71 "\n\tsubc.u64.cc t3, %3, 0x64774B84F38512BFU;"
72 "\n\tsubc.u64.cc t4, %4, 0x4B1BA7B6434BACD7U;"
73 "\n\tsubc.u64.cc t5, %5, 0x1A0111EA397FE69AU;"
74 "\n\tsubc.u64 t6, 0, 0;"
75 "\n\tsetp.ne.u64 nz, t6, 0;"
77 "\n@!nz\tmov.u64 %0, t0;"
78 "\n@!nz\tmov.u64 %1, t1;"
79 "\n@!nz\tmov.u64 %2, t2;"
80 "\n@!nz\tmov.u64 %3, t3;"
81 "\n@!nz\tmov.u64 %4, t4;"
82 "\n@!nz\tmov.u64 %5, t5;"
84 "\n\tsub.u64.cc t0, %0, 0xB9FEFFFFFFFFAAABU;"
85 "\n\tsubc.u64.cc t1, %1, 0x1EABFFFEB153FFFFU;"
86 "\n\tsubc.u64.cc t2, %2, 0x6730D2A0F6B0F624U;"
87 "\n\tsubc.u64.cc t3, %3, 0x64774B84F38512BFU;"
88 "\n\tsubc.u64.cc t4, %4, 0x4B1BA7B6434BACD7U;"
89 "\n\tsubc.u64.cc t5, %5, 0x1A0111EA397FE69AU;"
90 "\n\tsubc.u64 t6, 0, 0;"
91 "\n\tsetp.ne.u64 nz, t6, 0;"
93 "\n@!nz\tmov.u64 %0, t0;"
94 "\n@!nz\tmov.u64 %1, t1;"
95 "\n@!nz\tmov.u64 %2, t2;"
96 "\n@!nz\tmov.u64 %3, t3;"
97 "\n@!nz\tmov.u64 %4, t4;"
98 "\n@!nz\tmov.u64 %5, t5;"
102 "+l"(z0),
"+l"(z1),
"+l"(z2),
"+l"(z3),
"+l"(z4),
"+l"(z5)
105 z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3, z[4] = z4, z[5] = z5;
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
__device__ void fp_reduce6(fp_t &z)
Narrow reduction of a residue modulo p, reducing to the canonical representation.