112 x0 = p.
x[0], y0 = p.
y[0], z0 = p.
z[0],
113 x1 = p.
x[1], y1 = p.
y[1], z1 = p.
z[1],
114 x2 = p.
x[2], y2 = p.
y[2], z2 = p.
z[2],
115 x3 = p.
x[3], y3 = p.
y[3], z3 = p.
z[3],
116 x4 = p.
x[4], y4 = p.
y[4], z4 = p.
z[4],
117 x5 = p.
x[5], y5 = p.
y[5], z5 = p.
z[5],
119 u0 = q.
x[0], v0 = q.
y[0], w0 = q.
z[0],
120 u1 = q.
x[1], v1 = q.
y[1], w1 = q.
z[1],
121 u2 = q.
x[2], v2 = q.
y[2], w2 = q.
z[2],
122 u3 = q.
x[3], v3 = q.
y[3], w3 = q.
z[3],
123 u4 = q.
x[4], v4 = q.
y[4], w4 = q.
z[4],
124 u5 = q.
x[5], v5 = q.
y[5], w5 = q.
z[5];
128 "\n\t.reg .u64 X1<10>, X1a, X1b;"
129 "\n\t.reg .u64 X2<10>, X2a, X2b;"
130 "\n\t.reg .u64 Y1<10>, Y1a, Y1b;"
131 "\n\t.reg .u64 Y2<10>, Y2a, Y2b;"
132 "\n\t.reg .u64 Z1<10>, Z1a, Z1b;"
133 "\n\t.reg .u64 Z2<10>, Z2a, Z2b;"
134 "\n\t.reg .u64 t0<6>, t1<6>, t2<6>, t3<6>;"
136 "\n\t.reg .u64 t<6>;"
138 "\n\t.reg .pred ne, gt;"
140 "\n\tmov.u64 X10, %0;"
141 "\n\tmov.u64 X11, %1;"
142 "\n\tmov.u64 X12, %2;"
143 "\n\tmov.u64 X13, %3;"
144 "\n\tmov.u64 X14, %4;"
145 "\n\tmov.u64 X15, %5;"
147 "\n\tmov.u64 Y10, %6;"
148 "\n\tmov.u64 Y11, %7;"
149 "\n\tmov.u64 Y12, %8;"
150 "\n\tmov.u64 Y13, %9;"
151 "\n\tmov.u64 Y14, %10;"
152 "\n\tmov.u64 Y15, %11;"
154 "\n\tmov.u64 Z10, %12;"
155 "\n\tmov.u64 Z11, %13;"
156 "\n\tmov.u64 Z12, %14;"
157 "\n\tmov.u64 Z13, %15;"
158 "\n\tmov.u64 Z14, %16;"
159 "\n\tmov.u64 Z15, %17;"
161 "\n\tmov.u64 X20, %18;"
162 "\n\tmov.u64 X21, %19;"
163 "\n\tmov.u64 X22, %20;"
164 "\n\tmov.u64 X23, %21;"
165 "\n\tmov.u64 X24, %22;"
166 "\n\tmov.u64 X25, %23;"
168 "\n\tmov.u64 Y20, %24;"
169 "\n\tmov.u64 Y21, %25;"
170 "\n\tmov.u64 Y22, %26;"
171 "\n\tmov.u64 Y23, %27;"
172 "\n\tmov.u64 Y24, %28;"
173 "\n\tmov.u64 Y25, %29;"
175 "\n\tmov.u64 Z20, %30;"
176 "\n\tmov.u64 Z21, %31;"
177 "\n\tmov.u64 Z22, %32;"
178 "\n\tmov.u64 Z23, %33;"
179 "\n\tmov.u64 Z24, %34;"
180 "\n\tmov.u64 Z25, %35;"
237 "\n\tmov.u64 %0, X10;"
238 "\n\tmov.u64 %1, X11;"
239 "\n\tmov.u64 %2, X12;"
240 "\n\tmov.u64 %3, X13;"
241 "\n\tmov.u64 %4, X14;"
242 "\n\tmov.u64 %5, X15;"
244 "\n\tmov.u64 %6, Y10;"
245 "\n\tmov.u64 %7, Y11;"
246 "\n\tmov.u64 %8, Y12;"
247 "\n\tmov.u64 %9, Y13;"
248 "\n\tmov.u64 %10, Y14;"
249 "\n\tmov.u64 %11, Y15;"
251 "\n\tmov.u64 %12, Z10;"
252 "\n\tmov.u64 %13, Z11;"
253 "\n\tmov.u64 %14, Z12;"
254 "\n\tmov.u64 %15, Z13;"
255 "\n\tmov.u64 %16, Z14;"
256 "\n\tmov.u64 %17, Z15;"
260 "+l"(x0),
"+l"(x1),
"+l"(x2),
"+l"(x3),
"+l"(x4),
"+l"(x5),
261 "+l"(y0),
"+l"(y1),
"+l"(y2),
"+l"(y3),
"+l"(y4),
"+l"(y5),
262 "+l"(z0),
"+l"(z1),
"+l"(z2),
"+l"(z3),
"+l"(z4),
"+l"(z5)
264 "l"(u0),
"l"(u1),
"l"(u2),
"l"(u3),
"l"(u4),
"l"(u5),
265 "l"(v0),
"l"(v1),
"l"(v2),
"l"(v3),
"l"(v4),
"l"(v5),
266 "l"(w0),
"l"(w1),
"l"(w2),
"l"(w3),
"l"(w4),
"l"(w5)
269 p.
x[0] = x0, p.
x[1] = x1, p.
x[2] = x2, p.
x[3] = x3, p.
x[4] = x4, p.
x[5] = x5;
270 p.
y[0] = y0, p.
y[1] = y1, p.
y[2] = y2, p.
y[3] = y3, p.
y[4] = y4, p.
y[5] = y5;
271 p.
z[0] = z0, p.
z[1] = z1, p.
z[2] = z2, p.
z[3] = z3, p.
z[4] = z4, p.
z[5] = z5;
__device__ __host__ void fp_zero(fp_t &z)
Sets z to zero.
__device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y)
Computes the sum of two residues x and y modulo p and stores it in z. Device only function.
__device__ void fp_x12(fp_t &z, const fp_t &x)
Multiplies the residue mod p x by 12 and stores the result into z.
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
__device__ void fp_mul(fp_t &z, const fp_t &x, const fp_t &y)
Multiplies two Fp residues x and y, stores in z.
__device__ __host__ void fp_cpy(fp_t &z, const fp_t &x)
Copy from x into z.
__device__ void fp_x3(fp_t &z, const fp_t &x)
Multiplies x by 3 and stores the result into z.
__device__ void fp_sub(fp_t &z, const fp_t &x, const fp_t &y)
Calculates the difference of two residues modulo p and stores it into z.
#define FP_ADD(Z, X, Y)
PTX macro for addition of two residues modulo p. Z←X+Y.
#define FP_MUL(Z, X, Y)
PTX macro for multiplication of two residues mod p Reads X0..X5 and Y0..Y5. Writes Z0....
#define FP_REDUCE12(Z)
Wide reduction over 12 words.
#define FP_SUB(Z, X, Y)
PTX macro for calculating de difference of two residues modulo p, Z = X-Y.
#define FP_X12(Z, X)
PTX macro for multiplication by 12. Stores in Z.
#define FP_X3(Z, X)
PTX macro for multiplication by 3. Stores in Z.
__device__ bool g1p_isPoint(const g1p_t &p)
Check if the value stored in p is a valid point on the G1 curve.
__device__ void g1p_add(g1p_t &p, const g1p_t &q)
Computes the sum of two points q into p, using projective coordinates. and stores in p.
G1 point in projective coordinates.