28 "\n\tadd.u64.cc t0, %0, %0;"
29 "\n\taddc.u64.cc t1, %1, %1;"
30 "\n\taddc.u64.cc t2, %2, %2;"
31 "\n\taddc.u64.cc t3, %3, %3;"
32 "\n\taddc.u32 t4, 0, 0;"
36 "\n\tadd.u64.cc %0, %0, t0;"
37 "\n\taddc.u64.cc %1, %1, t1;"
38 "\n\taddc.u64.cc %2, %2, t2;"
39 "\n\taddc.u64.cc %3, %3, t3;"
40 "\n\taddc.u32 t4, 0, t4;"
44 "\n\tadd.u64.cc %0, %0, %0;"
45 "\n\taddc.u64.cc %1, %1, %1;"
46 "\n\taddc.u64.cc %2, %2, %2;"
47 "\n\taddc.u64.cc %3, %3, %3;"
48 "\n\taddc.u32 t4, t4, t4;"
52 "\n\tadd.u64.cc %0, %0, %0;"
53 "\n\taddc.u64.cc %1, %1, %1;"
54 "\n\taddc.u64.cc %2, %2, %2;"
55 "\n\taddc.u64.cc %3, %3, %3;"
56 "\n\taddc.u32 t4, t4, t4;"
60 "\n\tsetp.gt.u32 gt, t4, 7;"
61 "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFEF00000011U;"
62 "\n@gt\tsubc.u64.cc %1, %1, 0x8F97E432FFE41BEEU;"
63 "\n@gt\tsubc.u64.cc %2, %2, 0x66D75888A3BF585AU;"
64 "\n@gt\tsubc.u64.cc %3, %3, 0xB2C81C85C37551CBU;"
65 "\n@gt\tsubc.u32 t4, t4, 7;"
69 "\n\tsetp.gt.u32 gt, t4, 3;"
70 "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFF800000008U;"
71 "\n@gt\tsubc.u64.cc %1, %1, 0x9DED2017FFF2DFF7U;"
72 "\n@gt\tsubc.u64.cc %2, %2, 0x99CEC0404D0EC02AU;"
73 "\n@gt\tsubc.u64.cc %3, %3, 0x9F6D3A994CEBEA41U;"
74 "\n@gt\tsubc.u32 t4, t4, 3;"
78 "\n\tsetp.gt.u32 gt, t4, 1;"
79 "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFFC00000004U;"
80 "\n@gt\tsubc.u64.cc %1, %1, 0x4EF6900BFFF96FFBU;"
81 "\n@gt\tsubc.u64.cc %2, %2, 0xCCE7602026876015U;"
82 "\n@gt\tsubc.u64.cc %3, %3, 0xCFB69D4CA675F520U;"
83 "\n@gt\tsubc.u32 t4, t4, 1;"
87 "\n\tsetp.gt.u32 gt, t4, 0;"
88 "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFFE00000002U;"
89 "\n@gt\tsubc.u64.cc %1, %1, 0xA77B4805FFFCB7FDU;"
90 "\n@gt\tsubc.u64.cc %2, %2, 0x6673B0101343B00AU;"
91 "\n@gt\tsubc.u64.cc %3, %3, 0xE7DB4EA6533AFA90U;"
92 "\n@gt\tsubc.u32 t4, t4, 0;"
96 "\n\tsetp.gt.u32 gt, t4, 0;"
97 "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFFE00000002U;"
98 "\n@gt\tsubc.u64.cc %1, %1, 0xA77B4805FFFCB7FDU;"
99 "\n@gt\tsubc.u64.cc %2, %2, 0x6673B0101343B00AU;"
100 "\n@gt\tsubc.u64 %3, %3, 0xE7DB4EA6533AFA90U;"
104 "+l"(z0),
"+l"(z1),
"+l"(z2),
"+l"(z3));
106 z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3;
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
__device__ void fr_x12(fr_t &z)
Multiply the residue mod r z by 12 with weak reduction.