11 __managed__ uint8_t
cmp[512];
19 const size_t sharedmem = 512*4*8;
24 err = cudaFuncSetAttribute(
fr_fft_wrapper, cudaFuncAttributeMaxDynamicSharedMemorySize, sharedmem);
25 cudaDeviceSynchronize();
26 if (err != cudaSuccess) printf(
"Error cudaFuncSetAttribute: %d (%s)\n", err, cudaGetErrorName(err));
28 err = cudaFuncSetAttribute(
fr_ift_wrapper, cudaFuncAttributeMaxDynamicSharedMemorySize, sharedmem);
29 cudaDeviceSynchronize();
30 if (err != cudaSuccess) printf(
"Error cudaFuncSetAttribute: %d (%s)\n", err, cudaGetErrorName(err));
34 printf(
"=== RUN %s\n",
"fr_fft");
35 fr_fft_wrapper<<<1, 256, sharedmem>>>(
fft, q);
37 err = cudaDeviceSynchronize();
38 if (err != cudaSuccess) printf(
"Error fr_fft_wrapper: %d (%s)\n", err, cudaGetErrorName(err));
42 for (
int i=0; i<512; i++)
45 fr_eq_wrapper<<<16, 32>>>(
cmp, 512,
fft, a);
47 err = cudaDeviceSynchronize();
48 if (err != cudaSuccess) printf(
"Error fr_eq_wrapper: %d (%s)\n", err, cudaGetErrorName(err));
52 for (
int i=0; pass && i<512; i++)
54 printf(
"FFT error %d\n", i);
62 printf(
"=== RUN %s\n",
"fr_ift");
63 fr_ift_wrapper<<<1, 256, sharedmem>>>(
fft, a);
65 err = cudaDeviceSynchronize();
66 if (err != cudaSuccess) printf(
"Error fr_ift_wrapper: %d (%s)\n", err, cudaGetErrorName(err));
70 for (
int i=0; i<512; i++)
73 fr_eq_wrapper<<<16, 32>>>(
cmp, 512,
fft, q);
75 err = cudaDeviceSynchronize();
76 if (err != cudaSuccess) printf(
"Error fr_eq_wrapper: %d (%s)\n", err, cudaGetErrorName(err));
80 for (
int i=0; pass && i<512; i++)
82 printf(
"IFT error %d\n", i);
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
__global__ void fr_fft_wrapper(fr_t *output, const fr_t *input)
wrapper for fr_fft: FFT for fr_t[512]
__global__ void fr_ift_wrapper(fr_t *output, const fr_t *input)
wrapper for fr_ift: inverse FFT for fr_t[512]
__managed__ uint8_t cmp[512]
void FrTestFFT()
Tests fft and inverse fft over Fr using KAT.
__managed__ fr_t fft[512]