FK20 CUDA
fr_x3.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fr.cuh"
12 __device__ void fr_x3(fr_t &z) {
13  uint64_t
14  z0 = z[0],
15  z1 = z[1],
16  z2 = z[2],
17  z3 = z[3];
18 
19  asm volatile (
20  "\n\t{"
21  "\n\t.reg .u64 t<4>;"
22  "\n\t.reg .u32 t4;"
23  "\n\t.reg .pred cp;"
24 
25  // t = z + z
26 
27  "\n\tadd.u64.cc t0, %0, %0;"
28  "\n\taddc.u64.cc t1, %1, %1;"
29  "\n\taddc.u64.cc t2, %2, %2;"
30  "\n\taddc.u64.cc t3, %3, %3;"
31  "\n\taddc.u32 t4, 0, 0;"
32 
33  // if z >= 2^256 then z -= mmu0
34 
35  "\n\tsetp.ge.u32 cp, t4, 1;"
36  "\n@cp\tsub.u64.cc %0, %0, 0xFFFFFFFE00000002U;"
37  "\n@cp\tsubc.u64.cc %1, %1, 0xA77B4805FFFCB7FDU;"
38  "\n@cp\tsubc.u64.cc %2, %2, 0x6673B0101343B00AU;"
39  "\n@cp\tsubc.u64.cc %3, %3, 0xE7DB4EA6533AFA90U;"
40  "\n@cp\tsubc.u32 t4, t4, 0;"
41 
42  // z = z + t
43 
44  "\n\tadd.u64.cc %0, %0, t0;"
45  "\n\taddc.u64.cc %1, %1, t1;"
46  "\n\taddc.u64.cc %2, %2, t2;"
47  "\n\taddc.u64.cc %3, %3, t3;"
48  "\n\taddc.u32 t4, 0, t4;"
49 
50  // if z >= 2^256 then z -= mmu0
51 
52  "\n\tsetp.ge.u32 cp, t4, 1;"
53  "\n@cp\tsub.u64.cc %0, %0, 0xFFFFFFFE00000002U;"
54  "\n@cp\tsubc.u64.cc %1, %1, 0xA77B4805FFFCB7FDU;"
55  "\n@cp\tsubc.u64.cc %2, %2, 0x6673B0101343B00AU;"
56  "\n@cp\tsubc.u64.cc %3, %3, 0xE7DB4EA6533AFA90U;"
57  "\n@cp\tsubc.u32 t4, t4, 0;"
58 
59  // if z >= 2^256 then z -= mmu0
60 
61  "\n\tsetp.ge.u32 cp, t4, 1;"
62  "\n@cp\tsub.u64.cc %0, %0, 0xFFFFFFFE00000002U;"
63  "\n@cp\tsubc.u64.cc %1, %1, 0xA77B4805FFFCB7FDU;"
64  "\n@cp\tsubc.u64.cc %2, %2, 0x6673B0101343B00AU;"
65  "\n@cp\tsubc.u64 %3, %3, 0xE7DB4EA6533AFA90U;"
66 
67  "\n\t}"
68  :
69  "+l"(z0), "+l"(z1), "+l"(z2), "+l"(z3));
70 
71  z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3;
72 }
73 
74 // vim: ts=4 et sw=4 si
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__device__ void fr_x3(fr_t &z)
Multiply z by 3, and stores in z, with weak reduction.
Definition: fr_x3.cu:12