FK20 CUDA
fr_reduce4.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fr.cuh"
6 
16 __device__ void fr_reduce4(fr_t &z) {
17  uint64_t
18  z0 = z[0],
19  z1 = z[1],
20  z2 = z[2],
21  z3 = z[3];
22 
23  asm volatile (
24  "\n\t{"
25  "\n\t.reg .u64 t<5>;"
26  "\n\t.reg .pred cp;"
27 
28  // If z > 2^192*floor(r/2^192), then z -= r
29 
30  "\n\tsetp.gt.u64 cp, %3, 0x73EDA753299D7D48U;"
31 
32  "\n@cp\tsub.u64.cc %0, %0, 0xFFFFFFFF00000001U;"
33  "\n@cp\tsubc.u64.cc %1, %1, 0x53BDA402FFFE5BFEU;"
34  "\n@cp\tsubc.u64.cc %2, %2, 0x3339D80809A1D805U;"
35  "\n@cp\tsubc.u64.cc %3, %3, 0x73EDA753299D7D48U;"
36 
37  // t = z - r
38 
39  "\n\tsub.u64.cc t0, %0, 0xFFFFFFFF00000001U;"
40  "\n\tsubc.u64.cc t1, %1, 0x53BDA402FFFE5BFEU;"
41  "\n\tsubc.u64.cc t2, %2, 0x3339D80809A1D805U;"
42  "\n\tsubc.u64.cc t3, %3, 0x73EDA753299D7D48U;"
43  "\n\tsubc.u64 t4, 0, 0;"
44 
45  // If no underflow, then z = t
46 
47  "\n\tsetp.eq.u64 cp, t4, 0;"
48 
49  "\n@cp\tmov.u64 %0, t0;"
50  "\n@cp\tmov.u64 %1, t1;"
51  "\n@cp\tmov.u64 %2, t2;"
52  "\n@cp\tmov.u64 %3, t3;"
53 
54  "\n\t}"
55  :
56  "+l"(z0), "+l"(z1), "+l"(z2), "+l"(z3)
57  );
58 
59  z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3;
60 }
61 
62 // vim: ts=4 et sw=4 si
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__device__ void fr_reduce4(fr_t &z)
Reduced the value in fr_t to the field modulus.
Definition: fr_reduce4.cu:16