15 static __managed__
fr_t fr[
ROWS * 16 * 512];
28 unsigned tid = threadIdx.x;
29 unsigned bid = blockIdx.x;
57 fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr,
polynomial);
61 fr_fft_wrapper<<<rows * 16, 256, fr_sharedmem>>>(fr, fr);
65 fk20_msm<<<rows, 256>>>(g1p, fr,
xext_fft);
69 g1p_ift_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p, g1p);
73 fk20_hext2h<<<rows, 256>>>(g1p);
77 g1p_fft_wrapper<<<rows, 256, g1p_sharedmem>>>(
h_fft, g1p);
const size_t g1p_sharedmem
#define SET_SHAREDMEM(SZ, FN)
const size_t fr_sharedmem
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t h_fft[512 *512]
__managed__ fr_t polynomial[512 *4096]
__managed__ g1p_t h[512 *512]
__host__ void fk20_poly2h_fft(g1p_t *h_fft, const fr_t *polynomial, const g1p_t xext_fft[8192], unsigned rows)
polynomial + xext_fft -> h_fft This function is a wrapper for the full FK20 computation,...
__global__ void fk20_hext2h(g1p_t *h)
hext -> h Fill upper half of hext with inf, modifying in place.
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
__global__ void fr_fft_wrapper(fr_t *output, const fr_t *input)
wrapper for fr_fft: FFT for fr_t[512]
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
__global__ void g1p_fft_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_fft: FFT for arrays of g1p_t with length 512
__global__ void g1p_ift_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_ift: inverse FFT for arrays of g1p_t with length 512
G1 point in projective coordinates.