19 unsigned tid = 0; tid += blockIdx.z;
20 tid *= gridDim.y; tid += blockIdx.y;
21 tid *= gridDim.x; tid += blockIdx.x;
22 tid *= blockDim.z; tid += threadIdx.z;
23 tid *= blockDim.y; tid += threadIdx.y;
24 tid *= blockDim.x; tid += threadIdx.x;
34 "\n\t.reg .u64 t<4>, x<4>, y<4>;"
63 "+l"(x0),
"+l"(x1),
"+l"(x2),
"+l"(x3),
64 "+l"(y0),
"+l"(y1),
"+l"(y2),
"+l"(y3)
67 x[0] = x0, x[1] = x1, x[2] = x2, x[3] = x3;
68 y[0] = y0, y[1] = y1, y[2] = y2, y[3] = y3;
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
#define FR_ADD(Z, X, Y)
Device macro for Z = X+Y with overflow check.
__device__ void fr_addsub(fr_t &x, fr_t &y)
Computes the sum and the difference of the arguments, storing back into the arguments: (x,...
#define FR_SUB(Z, X, Y)
Macro for Z=X-Y. Consider that X is in registers X0..X3 and Y in Y0..Y3. Z and X can overlap.