9 static __device__
fr_t fr_tmp[512*512];
27 if (gridDim.y != 1)
return;
28 if (gridDim.z != 1)
return;
29 if (blockDim.x != 256)
return;
30 if (blockDim.y != 1)
return;
31 if (blockDim.z != 1)
return;
33 unsigned tid = threadIdx.x;
34 unsigned bid = blockIdx.x;
47 fr_t *fr = fr_tmp + 512 * bid;
51 for (
int i=0; i<16; i++) {
55 unsigned src = 16*tid + 15 - i;
56 unsigned dst = (tid+257) % 512;
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t hext_fft[512 *512]
__managed__ fr_t polynomial[512 *4096]
__global__ void fk20_poly2hext_fft(g1p_t *hext_fft, const fr_t *polynomial, const g1p_t xext_fft[8192])
polynomial + xext_fft -> hext_fft
__device__ __host__ void fr_zero(fr_t &z)
Sets the value of z to zero.
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
__device__ __host__ void fr_cpy(fr_t &z, const fr_t &x)
Copy from x into z.
__device__ void fr_fft(fr_t *output, const fr_t *input)
FFT over Fr.
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
__device__ void g1p_add(g1p_t &p, const g1p_t &q)
Computes the sum of two points q into p, using projective coordinates. and stores in p.
__device__ void g1p_mul(g1p_t &p, const fr_t &x)
p ← k·p Point multiplication by scalar, in projective coordinates. That result is stored back into p.
__device__ __host__ void g1p_cpy(g1p_t &p, const g1p_t &q)
Copy from q into p.
G1 point in projective coordinates.