FK20 CUDA
fr_sub.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fr.cuh"
6 #include "fr_sub.cuh"
7 
17 __device__ void fr_sub(fr_t &z, const fr_t &x) {
18  uint64_t
19  z0 = z[0], x0 = x[0],
20  z1 = z[1], x1 = x[1],
21  z2 = z[2], x2 = x[2],
22  z3 = z[3], x3 = x[3];
23 
24  asm volatile (
25  "\n\t{"
26  "\n\t.reg .u64 x<4>, z<4>;"
27  "\n\t.reg .u32 t4;"
28  "\n\t.reg .pred nz;"
29 
30  "\n\tmov.u64 z0, %0;"
31  "\n\tmov.u64 z1, %1;"
32  "\n\tmov.u64 z2, %2;"
33  "\n\tmov.u64 z3, %3;"
34 
35  "\n\tmov.u64 x0, %4;"
36  "\n\tmov.u64 x1, %5;"
37  "\n\tmov.u64 x2, %6;"
38  "\n\tmov.u64 x3, %7;"
39 
40 FR_SUB(z, z, x)
41 
42  "\n\tmov.u64 %0, z0;"
43  "\n\tmov.u64 %1, z1;"
44  "\n\tmov.u64 %2, z2;"
45  "\n\tmov.u64 %3, z3;"
46 
47  "\n\t}"
48  :
49  "+l"(z0), "+l"(z1), "+l"(z2), "+l"(z3)
50  :
51  "l"(x0), "l"(x1), "l"(x2), "l"(x3)
52  );
53 
54  z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3;
55 }
56 
57 // vim: ts=4 et sw=4 si
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__device__ void fr_sub(fr_t &z, const fr_t &x)
Calculates the difference of two residues modulo p and stores it into z.
Definition: fr_sub.cu:17
#define FR_SUB(Z, X, Y)
Macro for Z=X-Y. Consider that X is in registers X0..X3 and Y in Y0..Y3. Z and X can overlap.
Definition: fr_sub.cuh:10