15 static __managed__ uint8_t
cmp[16*512];
16 static __managed__
fr_t fr_tmp[16*512];
25 printf(
">>>> Poly Tests\n");
58 printf(
"\n>>>>Full integration test\n"); fflush(stdout);
59 printf(
"polynomial -> tc\n"); fflush(stdout);
62 fk20_poly2toeplitz_coefficients<<<rows, 256>>>(fr_tmp,
polynomial);
63 CUDASYNC(
"fk20_poly2toeplitz_coefficients");
74 printf(
"tc -> tc_fft\n"); fflush(stdout);
77 for(
int i=0; i<16; i++){
78 fr_fft_wrapper<<<rows, 256, fr_sharedmem>>>(fr_tmp+512*i, fr_tmp+512*i);
90 printf(
"tc_fft -> hext_fft\n"); fflush(stdout);
103 printf(
"hext_fft -> hext -> h\n"); fflush(stdout);
108 fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
119 printf(
"h -> h_fft\n"); fflush(stdout);
154 printf(
"\n>>>>Full integration test\n"); fflush(stdout);
158 printf(
"polynomial -> tc\n"); fflush(stdout);
161 fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr_tmp,
polynomial);
162 CUDASYNC(
"fk20_poly2toeplitz_coefficients");
173 printf(
"tc -> tc_fft\n"); fflush(stdout);
176 for(
int i=0; i<16; i++){
177 fr_fft_wrapper<<<rows, 256, fr_sharedmem>>>(fr_tmp+512*i, fr_tmp+512*i);
190 printf(
"tc_fft -> hext_fft\n"); fflush(stdout);
205 printf(
"hext_fft -> hext -> h\n"); fflush(stdout);
210 fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
222 printf(
"h -> h_fft\n"); fflush(stdout);
256 printf(
"=== RUN %s\n",
"fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients");
257 memset(fr_tmp, 0xAA,16*512*
sizeof(
fr_t));
258 for(
int testIDX=0; testIDX<=1; testIDX++){
261 fk20_poly2toeplitz_coefficients<<<1, 256 >>>(fr_tmp, polynomial_l);
264 CUDASYNC(
"fk20_poly2toeplitz_coefficients");
268 fr_eq_wrapper<<<256, 32>>>(
cmp, 16*512, fr_tmp, (
fr_t *)toeplitz_coefficients_l);
299 printf(
"=== RUN %s\n",
"fk20_poly2hext_fft: polynomial -> hext_fft");
301 for(
int testIDX=0; testIDX<=1; testIDX++){
304 fk20_poly2hext_fft<<<1, 256, fr_sharedmem>>>(
g1p_tmp, polynomial_l, (
const g1p_t *)xext_fft_l);
337 printf(
"=== RUN %s\n",
"fk20_poly2h_fft: polynomial -> h_fft (full computation)");
340 memset(fr_tmp,0xAA,8192*
sizeof(
fr_t));
341 for(
int testIDX=0; testIDX<=1; testIDX++){
373 g1p_t xext_fft_l[16][512]){
378 printf(
"=== RUN %s\n",
"fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft");
380 for(
int testIDX=0; testIDX<=1; testIDX++){
383 fk20_msm<<<1, 256>>>(
g1p_tmp, (
const fr_t*)toeplitz_coefficients_fft_l, (
const g1p_t*)xext_fft_l);
const size_t g1p_sharedmem
__host__ void fk20_poly2h_fft(g1p_t *h_fft, const fr_t *polynomial, const g1p_t xext_fft[8192], unsigned rows)
polynomial + xext_fft -> h_fft This function is a wrapper for the full FK20 computation,...
__global__ void fk20_poly2hext_fft(g1p_t *hext_fft, const fr_t *polynomial, const g1p_t xext_fft[8192])
polynomial + xext_fft -> hext_fft
#define SET_SHAREDMEM(SZ, FN)
#define CUDASYNC(fmt,...)
const size_t fr_sharedmem
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t hext_fft[512 *512]
__managed__ g1p_t h_fft[512 *512]
__managed__ fr_t toeplitz_coefficients[512 *16][512]
__managed__ fr_t polynomial[512 *4096]
__managed__ fr_t toeplitz_coefficients_fft[512 *16][512]
void varMangle(fr_t *target, size_t size, unsigned step)
swap elements at positions multiple of step. Nondestructive, call a second time to undo the changes
__managed__ g1p_t h[512 *512]
__managed__ uint8_t cmp[16 *512]
void fk20_msmloop(g1p_t hext_fft_l[512], fr_t toeplitz_coefficients_fft_l[16][512], g1p_t xext_fft_l[16][512])
Test for fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft.
void fk20_poly2hext_fft_test(fr_t polynomial_l[4096], g1p_t xext_fft_l[16][512], g1p_t hext_fft_l[512])
Test for fk20_poly2hext_fft: polynomial -> hext_fft.
void fk20_poly2h_fft_test(fr_t polynomial_l[4096], g1p_t xext_fft_l[16][512], g1p_t h_fft_l[512])
Test for fk20_poly2h_fft: polynomial -> h_fft.
void fullTestFalsifiability()
void fk20_poly2toeplitz_coefficients_test(fr_t polynomial_l[4096], fr_t toeplitz_coefficients_l[16][512])
Test for fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients.
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
__global__ void fr_fft_wrapper(fr_t *output, const fr_t *input)
wrapper for fr_fft: FFT for fr_t[512]
__global__ void g1p_fft_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_fft: FFT for arrays of g1p_t with length 512
__global__ void g1p_ift_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_ift: inverse FFT for arrays of g1p_t with length 512
__shared__ g1p_t g1p_tmp[]
G1 point in projective coordinates.
#define NEGCMPCHECK(LENGTH)
#define NEGPRINTPASS(pass)