FK20 CUDA
fr_x12.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fr.cuh"
6 
13 __device__ void fr_x12(fr_t &z) {
14  uint64_t
15  z0 = z[0],
16  z1 = z[1],
17  z2 = z[2],
18  z3 = z[3];
19 
20  asm volatile (
21  "\n\t{"
22  "\n\t.reg .u64 t<4>;"
23  "\n\t.reg .u32 t4;"
24  "\n\t.reg .pred gt;"
25 
26  // t = z + z
27 
28  "\n\tadd.u64.cc t0, %0, %0;"
29  "\n\taddc.u64.cc t1, %1, %1;"
30  "\n\taddc.u64.cc t2, %2, %2;"
31  "\n\taddc.u64.cc t3, %3, %3;"
32  "\n\taddc.u32 t4, 0, 0;"
33 
34  // z = z + t
35 
36  "\n\tadd.u64.cc %0, %0, t0;"
37  "\n\taddc.u64.cc %1, %1, t1;"
38  "\n\taddc.u64.cc %2, %2, t2;"
39  "\n\taddc.u64.cc %3, %3, t3;"
40  "\n\taddc.u32 t4, 0, t4;"
41 
42  // z = z + z
43 
44  "\n\tadd.u64.cc %0, %0, %0;"
45  "\n\taddc.u64.cc %1, %1, %1;"
46  "\n\taddc.u64.cc %2, %2, %2;"
47  "\n\taddc.u64.cc %3, %3, %3;"
48  "\n\taddc.u32 t4, t4, t4;"
49 
50  // z = z + z
51 
52  "\n\tadd.u64.cc %0, %0, %0;"
53  "\n\taddc.u64.cc %1, %1, %1;"
54  "\n\taddc.u64.cc %2, %2, %2;"
55  "\n\taddc.u64.cc %3, %3, %3;"
56  "\n\taddc.u32 t4, t4, t4;"
57 
58  // if z >= 2^259 then z -= 17m
59 
60  "\n\tsetp.gt.u32 gt, t4, 7;"
61  "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFEF00000011U;"
62  "\n@gt\tsubc.u64.cc %1, %1, 0x8F97E432FFE41BEEU;"
63  "\n@gt\tsubc.u64.cc %2, %2, 0x66D75888A3BF585AU;"
64  "\n@gt\tsubc.u64.cc %3, %3, 0xB2C81C85C37551CBU;"
65  "\n@gt\tsubc.u32 t4, t4, 7;"
66 
67  // if z >= 2^258 then z -= 8m
68 
69  "\n\tsetp.gt.u32 gt, t4, 3;"
70  "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFF800000008U;"
71  "\n@gt\tsubc.u64.cc %1, %1, 0x9DED2017FFF2DFF7U;"
72  "\n@gt\tsubc.u64.cc %2, %2, 0x99CEC0404D0EC02AU;"
73  "\n@gt\tsubc.u64.cc %3, %3, 0x9F6D3A994CEBEA41U;"
74  "\n@gt\tsubc.u32 t4, t4, 3;"
75 
76  // if z >= 2^257 then z -= 4m
77 
78  "\n\tsetp.gt.u32 gt, t4, 1;"
79  "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFFC00000004U;"
80  "\n@gt\tsubc.u64.cc %1, %1, 0x4EF6900BFFF96FFBU;"
81  "\n@gt\tsubc.u64.cc %2, %2, 0xCCE7602026876015U;"
82  "\n@gt\tsubc.u64.cc %3, %3, 0xCFB69D4CA675F520U;"
83  "\n@gt\tsubc.u32 t4, t4, 1;"
84 
85  // if z >= 2^256 then z -= 2m
86 
87  "\n\tsetp.gt.u32 gt, t4, 0;"
88  "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFFE00000002U;"
89  "\n@gt\tsubc.u64.cc %1, %1, 0xA77B4805FFFCB7FDU;"
90  "\n@gt\tsubc.u64.cc %2, %2, 0x6673B0101343B00AU;"
91  "\n@gt\tsubc.u64.cc %3, %3, 0xE7DB4EA6533AFA90U;"
92  "\n@gt\tsubc.u32 t4, t4, 0;"
93 
94  // if z >= 2^256 then z -= 2m
95 
96  "\n\tsetp.gt.u32 gt, t4, 0;"
97  "\n@gt\tsub.u64.cc %0, %0, 0xFFFFFFFE00000002U;"
98  "\n@gt\tsubc.u64.cc %1, %1, 0xA77B4805FFFCB7FDU;"
99  "\n@gt\tsubc.u64.cc %2, %2, 0x6673B0101343B00AU;"
100  "\n@gt\tsubc.u64 %3, %3, 0xE7DB4EA6533AFA90U;"
101 
102  "\n\t}"
103  :
104  "+l"(z0), "+l"(z1), "+l"(z2), "+l"(z3));
105 
106  z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3;
107 }
108 
109 // vim: ts=4 et sw=4 si
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__device__ void fr_x12(fr_t &z)
Multiply the residue mod r z by 12 with weak reduction.
Definition: fr_x12.cu:13