FK20 CUDA
fr_add.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fr.cuh"
6 #include "fr_add.cuh"
7 
16 __device__ void fr_add(fr_t &z, const fr_t &x) {
17  uint64_t
18  z0 = z[0], x0 = x[0],
19  z1 = z[1], x1 = x[1],
20  z2 = z[2], x2 = x[2],
21  z3 = z[3], x3 = x[3];
22 
23  asm volatile (
24  "\n\t{"
25  "\n\t.reg .u64 x<4>, z<4>;"
26  "\n\t.reg .u32 t4;"
27  "\n\t.reg .pred nz;"
28 
29  "\n\tmov.u64 z0, %0;"
30  "\n\tmov.u64 z1, %1;"
31  "\n\tmov.u64 z2, %2;"
32  "\n\tmov.u64 z3, %3;"
33 
34  "\n\tmov.u64 x0, %4;"
35  "\n\tmov.u64 x1, %5;"
36  "\n\tmov.u64 x2, %6;"
37  "\n\tmov.u64 x3, %7;"
38 
39 FR_ADD(z, z, x)
40 
41  "\n\tmov.u64 %0, z0;"
42  "\n\tmov.u64 %1, z1;"
43  "\n\tmov.u64 %2, z2;"
44  "\n\tmov.u64 %3, z3;"
45 
46  "\n\t}"
47  :
48  "+l"(z0), "+l"(z1), "+l"(z2), "+l"(z3)
49  :
50  "l"(x0), "l"(x1), "l"(x2), "l"(x3)
51  );
52 
53  z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3;
54 }
55 
56 // vim: ts=4 et sw=4 si
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__device__ void fr_add(fr_t &z, const fr_t &x)
Computes the sum of two residues x and z modulo r and stores it in z. Device only function.
Definition: fr_add.cu:16
#define FR_ADD(Z, X, Y)
Device macro for Z = X+Y with overflow check.
Definition: fr_add.cuh:14