39 #define CUDASYNC(fmt, ...) \
40 err = cudaDeviceSynchronize(); \
41 if (err != cudaSuccess) \
42 printf("%s:%d " fmt " Error: %d (%s)\n", __FILE__, __LINE__, err, cudaGetErrorName(err), ##__VA_ARGS__)
48 #define SET_SHAREDMEM(SZ, FN) \
49 err = cudaFuncSetAttribute(FN, cudaFuncAttributeMaxDynamicSharedMemorySize, SZ); \
50 cudaDeviceSynchronize(); \
51 if (err != cudaSuccess) \
52 printf("Error cudaFuncSetAttribute: %s:%d, error %d (%s)\n", __FILE__, __LINE__, err, cudaGetErrorName(err));
__global__ void fk20_hext_fft2h(g1p_t *h, const g1p_t *hext_fft)
const size_t g1p_sharedmem
__global__ void fk20_msm(g1p_t *hext_fft, const fr_t *toeplitz_coefficients_fft, const g1p_t *xext_fft)
toeplitz_coefficients_fft + xext_fft -> hext_fft
__global__ void fk20_poly2toeplitz_coefficients(fr_t *toeplitz_coefficients, const fr_t *polynomial)
polynomial -> toeplitz_coefficients
__host__ void fk20_poly2h_fft(g1p_t *h_fft, const fr_t *polynomial, const g1p_t xext_fft[8192], unsigned rows)
polynomial + xext_fft -> h_fft This function is a wrapper for the full FK20 computation,...
__global__ void fk20_poly2hext_fft(g1p_t *hext_fft, const fr_t *polynomial, const g1p_t xext_fft[8192])
polynomial + xext_fft -> hext_fft
__global__ void fk20_poly2toeplitz_coefficients_fft(fr_t *toeplitz_coefficients_fft, const fr_t *polynomial)
polynomial -> toeplitz_coefficients_fft
__global__ void fk20_hext_fft2h_fft(g1p_t *h_fft, const g1p_t *hext_fft)
hext_fft -> h_fft
__global__ void fk20_setup2xext_fft(g1p_t xext_fft[8192], const g1p_t *setup)
const size_t fr_sharedmem
__global__ void fk20_h2h_fft(g1p_t *h_fft, const g1p_t *h)
h -> h_fft
__global__ void fk20_hext2h(g1p_t *h)
hext -> h Fill upper half of hext with inf, modifying in place.
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t hext_fft[512 *512]
__managed__ g1p_t h_fft[512 *512]
__managed__ fr_t toeplitz_coefficients[512 *16][512]
__managed__ fr_t polynomial[512 *4096]
__managed__ fr_t toeplitz_coefficients_fft[512 *16][512]
__managed__ g1p_t h[512 *512]
__managed__ g1p_t setup[4097]
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
G1 point in projective coordinates.