26 g1p_print(
"ERROR in g1p_dbl(): Invalid point ", p);
38 x0 = p.
x[0], x1 = p.
x[1], x2 = p.
x[2], x3 = p.
x[3], x4 = p.
x[4], x5 = p.
x[5],
39 y0 = p.
y[0], y1 = p.
y[1], y2 = p.
y[2], y3 = p.
y[3], y4 = p.
y[4], y5 = p.
y[5],
40 z0 = p.
z[0], z1 = p.
z[1], z2 = p.
z[2], z3 = p.
z[3], z4 = p.
z[4], z5 = p.
z[5];
44 "\n\t.reg .u64 v<6>, w<6>, x<6>, y<6>, z<6>;"
46 "\n\t.reg .u64 u<10>, ua, ub;"
50 "\n\t.reg .pred nz, gt;"
63 "\n\tmov.u64 y4, %10;"
64 "\n\tmov.u64 y5, %11;"
66 "\n\tmov.u64 z0, %12;"
67 "\n\tmov.u64 z1, %13;"
68 "\n\tmov.u64 z2, %14;"
69 "\n\tmov.u64 z3, %15;"
70 "\n\tmov.u64 z4, %16;"
71 "\n\tmov.u64 z5, %17;"
100 "\n\tmov.u64 z2, u2;"
101 "\n\tmov.u64 z3, u3;"
102 "\n\tmov.u64 z4, u4;"
103 "\n\tmov.u64 z5, u5;"
108 "\n\tmov.u64 y0, u0;"
109 "\n\tmov.u64 y1, u1;"
110 "\n\tmov.u64 y2, u2;"
111 "\n\tmov.u64 y3, u3;"
112 "\n\tmov.u64 y4, u4;"
113 "\n\tmov.u64 y5, u5;"
122 "\n\tmov.u64 x0, u0;"
123 "\n\tmov.u64 x1, u1;"
124 "\n\tmov.u64 x2, u2;"
125 "\n\tmov.u64 x3, u3;"
126 "\n\tmov.u64 x4, u4;"
127 "\n\tmov.u64 x5, u5;"
134 "\n\tmov.u64 w0, u0;"
135 "\n\tmov.u64 w1, u1;"
136 "\n\tmov.u64 w2, u2;"
137 "\n\tmov.u64 w3, u3;"
138 "\n\tmov.u64 w4, u4;"
139 "\n\tmov.u64 w5, u5;"
150 "\n\tmov.u64 z0, u0;"
151 "\n\tmov.u64 z1, u1;"
152 "\n\tmov.u64 z2, u2;"
153 "\n\tmov.u64 z3, u3;"
154 "\n\tmov.u64 z4, u4;"
155 "\n\tmov.u64 z5, u5;"
160 "\n\tmov.u64 y0, u0;"
161 "\n\tmov.u64 y1, u1;"
162 "\n\tmov.u64 y2, u2;"
163 "\n\tmov.u64 y3, u3;"
164 "\n\tmov.u64 y4, u4;"
165 "\n\tmov.u64 y5, u5;"
169 "\n\tmov.u64 %0, x0;"
170 "\n\tmov.u64 %1, x1;"
171 "\n\tmov.u64 %2, x2;"
172 "\n\tmov.u64 %3, x3;"
173 "\n\tmov.u64 %4, x4;"
174 "\n\tmov.u64 %5, x5;"
176 "\n\tmov.u64 %6, y0;"
177 "\n\tmov.u64 %7, y1;"
178 "\n\tmov.u64 %8, y2;"
179 "\n\tmov.u64 %9, y3;"
180 "\n\tmov.u64 %10, y4;"
181 "\n\tmov.u64 %11, y5;"
183 "\n\tmov.u64 %12, z0;"
184 "\n\tmov.u64 %13, z1;"
185 "\n\tmov.u64 %14, z2;"
186 "\n\tmov.u64 %15, z3;"
187 "\n\tmov.u64 %16, z4;"
188 "\n\tmov.u64 %17, z5;"
192 "+l"(x0),
"+l"(x1),
"+l"(x2),
"+l"(x3),
"+l"(x4),
"+l"(x5),
193 "+l"(y0),
"+l"(y1),
"+l"(y2),
"+l"(y3),
"+l"(y4),
"+l"(y5),
194 "+l"(z0),
"+l"(z1),
"+l"(z2),
"+l"(z3),
"+l"(z4),
"+l"(z5)
197 p.
x[0] = x0; p.
x[1] = x1; p.
x[2] = x2; p.
x[3] = x3; p.
x[4] = x4; p.
x[5] = x5;
198 p.
y[0] = y0; p.
y[1] = y1; p.
y[2] = y2; p.
y[3] = y3; p.
y[4] = y4; p.
y[5] = y5;
199 p.
z[0] = z0; p.
z[1] = z1; p.
z[2] = z2; p.
z[3] = z3; p.
z[4] = z4; p.
z[5] = z5;
__device__ __host__ void fp_zero(fp_t &z)
Sets z to zero.
__device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y)
Computes the sum of two residues x and y modulo p and stores it in z. Device only function.
__device__ void fp_x8(fp_t &z, const fp_t &x)
Multiplies x by 8 and stores the result into z.
__device__ void fp_x2(fp_t &z, const fp_t &x)
Multiplies x by 2 and stores the result into z.
__device__ void fp_x12(fp_t &z, const fp_t &x)
Multiplies the residue mod p x by 12 and stores the result into z.
__device__ void fp_sqr(fp_t &z, const fp_t &x)
Computes the square of the residue x modulo p and stores it in z.
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
__device__ void fp_mul(fp_t &z, const fp_t &x, const fp_t &y)
Multiplies two Fp residues x and y, stores in z.
__device__ __host__ void fp_cpy(fp_t &z, const fp_t &x)
Copy from x into z.
__device__ void fp_x3(fp_t &z, const fp_t &x)
Multiplies x by 3 and stores the result into z.
__device__ void fp_sub(fp_t &z, const fp_t &x, const fp_t &y)
Calculates the difference of two residues modulo p and stores it into z.
#define FP_ADD(Z, X, Y)
PTX macro for addition of two residues modulo p. Z←X+Y.
#define FP_MUL(Z, X, Y)
PTX macro for multiplication of two residues mod p Reads X0..X5 and Y0..Y5. Writes Z0....
#define FP_REDUCE12(Z)
Wide reduction over 12 words.
#define FP_SQR(Z, X)
PTX macro for computing the square of the residue x modulo p. Stores in z.
#define FP_SUB(Z, X, Y)
PTX macro for calculating de difference of two residues modulo p, Z = X-Y.
#define FP_X12(Z, X)
PTX macro for multiplication by 12. Stores in Z.
#define FP_X2(Z, X)
PTX macro for multiplication by 2. Stores in Z.
#define FP_X3(Z, X)
PTX macro for multiplication by 3. Stores in Z.
#define FP_X8(Z, X)
PTX macro for multiplication by 8. Stores in Z.
__device__ bool g1p_isPoint(const g1p_t &p)
Check if the value stored in p is a valid point on the G1 curve.
__device__ __host__ void g1p_print(const char *s, const g1p_t &p)
Print a standard representation of p, preceded by the user-set string s.
__device__ void g1p_dbl(g1p_t &p)
G1 point doubling, with write back: p=2*p.
G1 point in projective coordinates.