18         x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5],
 
   19         z0 = z[0], z1 = z[1], z2 = z[2], z3 = z[3], z4 = z[4], z5 = z[5];
 
   23     "\n\t.reg .u64 z<6>, x<6>;" 
   24     "\n\t.reg .u64 u<10>, ua, ub;" 
   32     "\n\tmov.u64 x4, %10;" 
   33     "\n\tmov.u64 x5, %11;" 
   47     "=l"(z0), 
"=l"(z1), 
"=l"(z2), 
"=l"(z3), 
"=l"(z4), 
"=l"(z5)
 
   49     "l"(x0), 
"l"(x1), 
"l"(x2), 
"l"(x3), 
"l"(x4), 
"l"(x5)
 
   52     z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4; z[5] = z5;
 
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
#define FP_REDUCE12(Z)
Wide reduction over 12 words.
__device__ void fp_sqr(fp_t &z, const fp_t &x)
Computes the square of the residue x modulo p and stores it in z.
#define FP_SQR(Z, X)
PTX macro for computing the square of the residue x modulo p. Stores in z.