22         v0 = v[0], v1 = v[1], v2 = v[2], v3 = v[3], v4 = v[4], v5 = v[5],
 
   23         w0 = w[0], w1 = w[1], w2 = w[2], w3 = w[3], w4 = w[4], w5 = w[5],
 
   24         x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5],
 
   25         y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4], y5 = y[5],
 
   26         z0, z1, z2, z3, z4, z5;
 
   30     "\n\t.reg .u64 v<6>, w<6>, x<6>, y<6>;" 
   31     "\n\t.reg .u64 u<10>, ua, ub;" 
   41     "\n\tmov.u64 v4, %10;" 
   42     "\n\tmov.u64 v5, %11;" 
   44     "\n\tmov.u64 w0, %12;" 
   45     "\n\tmov.u64 w1, %13;" 
   46     "\n\tmov.u64 w2, %14;" 
   47     "\n\tmov.u64 w3, %15;" 
   48     "\n\tmov.u64 w4, %16;" 
   49     "\n\tmov.u64 w5, %17;" 
   51     "\n\tmov.u64 x0, %18;" 
   52     "\n\tmov.u64 x1, %19;" 
   53     "\n\tmov.u64 x2, %20;" 
   54     "\n\tmov.u64 x3, %21;" 
   55     "\n\tmov.u64 x4, %22;" 
   56     "\n\tmov.u64 x5, %23;" 
   58     "\n\tmov.u64 y0, %24;" 
   59     "\n\tmov.u64 y1, %25;" 
   60     "\n\tmov.u64 y2, %26;" 
   61     "\n\tmov.u64 y3, %27;" 
   62     "\n\tmov.u64 y4, %28;" 
   63     "\n\tmov.u64 y5, %29;" 
   85     "\n\tadd.u64.cc  u0, u0, v0;" 
   86     "\n\taddc.u64.cc u1, u1, v1;" 
   87     "\n\taddc.u64.cc u2, u2, v2;" 
   88     "\n\taddc.u64.cc u3, u3, v3;" 
   89     "\n\taddc.u64.cc u4, u4, v4;" 
   90     "\n\taddc.u64.cc u5, u5, v5;" 
   91     "\n\taddc.u64.cc u6, u6, w0;" 
   92     "\n\taddc.u64.cc u7, u7, w1;" 
   93     "\n\taddc.u64.cc u8, u8, w2;" 
   94     "\n\taddc.u64.cc u9, u9, w3;" 
   95     "\n\taddc.u64.cc ua, ua, w4;" 
   96     "\n\taddc.u64.cc ub, ub, w5;" 
   97     "\n\taddc.u32    uc,  0,  0;" 
  103     "\n\tsetp.ne.u32 nz, uc, 0;" 
  104     "\n@nz\tsub.u64.cc  u6, u6, 0x89f6fffffffd0003U;" 
  105     "\n@nz\tsubc.u64.cc u7, u7, 0x140bfff43bf3fffdU;" 
  106     "\n@nz\tsubc.u64.cc u8, u8, 0xa0b767a8ac38a745U;" 
  107     "\n@nz\tsubc.u64.cc u9, u9, 0x8831a7ac8fada8baU;" 
  108     "\n@nz\tsubc.u64.cc ua, ua, 0xa3f8e5685da91392U;" 
  109     "\n@nz\tsubc.u64.cc ub, ub, 0xea09a13c057f1b6cU;" 
  110     "\n@nz\tsubc.u32    uc, uc, 0;" 
  114     "\n\tsetp.ne.u32 nz, uc, 0;" 
  115     "\n@nz\tsub.u64.cc  u6, u6, 0x89f6fffffffd0003U;" 
  116     "\n@nz\tsubc.u64.cc u7, u7, 0x140bfff43bf3fffdU;" 
  117     "\n@nz\tsubc.u64.cc u8, u8, 0xa0b767a8ac38a745U;" 
  118     "\n@nz\tsubc.u64.cc u9, u9, 0x8831a7ac8fada8baU;" 
  119     "\n@nz\tsubc.u64.cc ua, ua, 0xa3f8e5685da91392U;" 
  120     "\n@nz\tsubc.u64.cc ub, ub, 0xea09a13c057f1b6cU;" 
  124     "\n\tmov.u64 %0,  u0;" 
  125     "\n\tmov.u64 %1,  u1;" 
  126     "\n\tmov.u64 %2,  u2;" 
  127     "\n\tmov.u64 %3,  u3;" 
  128     "\n\tmov.u64 %4,  u4;" 
  129     "\n\tmov.u64 %5,  u5;" 
  133     "=l"(z0), 
"=l"(z1), 
"=l"(z2), 
"=l"(z3), 
"=l"(z4), 
"=l"(z5)
 
  135     "l"(v0), 
"l"(v1), 
"l"(v2), 
"l"(v3), 
"l"(v4), 
"l"(v5),
 
  136     "l"(w0), 
"l"(w1), 
"l"(w2), 
"l"(w3), 
"l"(w4), 
"l"(w5),
 
  137     "l"(x0), 
"l"(x1), 
"l"(x2), 
"l"(x3), 
"l"(x4), 
"l"(x5),
 
  138     "l"(y0), 
"l"(y1), 
"l"(y2), 
"l"(y3), 
"l"(y4), 
"l"(y5)
 
  141     z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4; z[5] = z5;
 
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
__device__ void fp_mma(fp_t &z, const fp_t &v, const fp_t &w, const fp_t &x, const fp_t &y)
Fp multiply-multiply-add. Fast execution of z = (v*w + x*y) mod p The double-wide products are added ...
#define FP_MUL(Z, X, Y)
PTX macro for multiplication of two residues mod p Reads X0..X5 and Y0..Y5. Writes Z0....
#define FP_REDUCE12(Z)
Wide reduction over 12 words.