FK20 CUDA
fr_addsub.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fr.cuh"
6 #include "fr_add.cuh"
7 #include "fr_sub.cuh"
8 
18 __device__ void fr_addsub(fr_t &x, fr_t &y) {
19  unsigned tid = 0; tid += blockIdx.z;
20  tid *= gridDim.y; tid += blockIdx.y;
21  tid *= gridDim.x; tid += blockIdx.x;
22  tid *= blockDim.z; tid += threadIdx.z;
23  tid *= blockDim.y; tid += threadIdx.y;
24  tid *= blockDim.x; tid += threadIdx.x;
25 
26  uint64_t
27  x0 = x[0], y0 = y[0],
28  x1 = x[1], y1 = y[1],
29  x2 = x[2], y2 = y[2],
30  x3 = x[3], y3 = y[3];
31 
32  asm volatile (
33  "\n\t{"
34  "\n\t.reg .u64 t<4>, x<4>, y<4>;"
35  "\n\t.reg .u32 t4;"
36  "\n\t.reg .pred nz;"
37 
38  "\n\tmov.u64 x0, %0;"
39  "\n\tmov.u64 x1, %1;"
40  "\n\tmov.u64 x2, %2;"
41  "\n\tmov.u64 x3, %3;"
42 
43  "\n\tmov.u64 y0, %4;"
44  "\n\tmov.u64 y1, %5;"
45  "\n\tmov.u64 y2, %6;"
46  "\n\tmov.u64 y3, %7;"
47 
48  FR_ADD(t, x, y)
49  FR_SUB(y, x, y)
50 
51  "\n\tmov.u64 %0, t0;"
52  "\n\tmov.u64 %1, t1;"
53  "\n\tmov.u64 %2, t2;"
54  "\n\tmov.u64 %3, t3;"
55 
56  "\n\tmov.u64 %4, y0;"
57  "\n\tmov.u64 %5, y1;"
58  "\n\tmov.u64 %6, y2;"
59  "\n\tmov.u64 %7, y3;"
60 
61  "\n\t}"
62  :
63  "+l"(x0), "+l"(x1), "+l"(x2), "+l"(x3),
64  "+l"(y0), "+l"(y1), "+l"(y2), "+l"(y3)
65  );
66 
67  x[0] = x0, x[1] = x1, x[2] = x2, x[3] = x3;
68  y[0] = y0, y[1] = y1, y[2] = y2, y[3] = y3;
69 }
70 
71 // vim: ts=4 et sw=4 si
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
#define FR_ADD(Z, X, Y)
Device macro for Z = X+Y with overflow check.
Definition: fr_add.cuh:14
__device__ void fr_addsub(fr_t &x, fr_t &y)
Computes the sum and the difference of the arguments, storing back into the arguments: (x,...
Definition: fr_addsub.cu:18
#define FR_SUB(Z, X, Y)
Macro for Z=X-Y. Consider that X is in registers X0..X3 and Y in Y0..Y3. Z and X can overlap.
Definition: fr_sub.cuh:10