23 extern __managed__
g1p_t h[512*512];
31 static __managed__ uint8_t cmp[512*16*512];
32 static __managed__
fr_t fr_tmp_[512*16*512];
35 #define PatternOnWorkspaceMemory
36 #ifdef PatternOnWorkspaceMemory
37 #define PTRN_G1PTMP memset(g1p_tmp, 0x88, 512*512*sizeof(g1p_t));
38 #define PTRN_FRTMP memset(fr_tmp_, 0x88, 512*16*512*sizeof(fr_t));
75 int main(
int argc,
char **argv) {
89 printf(
"=== RUN test with %d rows\n\n", rows);
146 printf(
"\n>>>>Full integration test\n"); fflush(stdout);
147 printf(
"polynomial -> tc\n"); fflush(stdout);
150 fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr_tmp_,
polynomial);
151 CUDASYNC(
"fk20_poly2toeplitz_coefficients");
157 for (
int i=0; i<16*512; i++)
159 printf(
"poly2tc error %04x\n", i);
166 printf(
"tc -> tc_fft\n"); fflush(stdout);
169 fr_fft_wrapper<<<rows*16, 256, fr_sharedmem>>>(fr_tmp_, fr_tmp_);
181 printf(
"tc_fft -> hext_fft\n"); fflush(stdout);
196 printf(
"hext_fft -> hext -> h\n"); fflush(stdout);
201 fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
206 g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512,
g1p_tmp, (
g1p_t *)
h);
213 printf(
"h -> h_fft\n"); fflush(stdout);
221 g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512,
g1p_tmp,
h_fft);
250 printf(
"\n>>>>Full integration test - Falsifiability\n"); fflush(stdout);
251 printf(
"polynomial -> tc\n"); fflush(stdout);
254 fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr_tmp_,
polynomial);
255 CUDASYNC(
"fk20_poly2toeplitz_coefficients");
266 printf(
"tc -> tc_fft\n"); fflush(stdout);
269 fr_fft_wrapper<<<rows*16, 256, fr_sharedmem>>>(fr_tmp_, fr_tmp_);
281 printf(
"tc_fft -> hext_fft\n"); fflush(stdout);
296 printf(
"hext_fft -> hext -> h\n"); fflush(stdout);
301 fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
306 g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512,
g1p_tmp, (
g1p_t *)
h);
313 printf(
"h -> h_fft\n"); fflush(stdout);
321 g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512,
g1p_tmp,
h_fft);
344 printf(
"=== RUN %s\n",
"fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft");
345 for(
int testIDX=0; testIDX<=1; testIDX++){
384 printf(
"=== RUN %s\n",
"g1p_fft: h -> h_fft");
385 for(
int testIDX=0; testIDX<=1; testIDX++){
388 g1p_fft_wrapper<<<rows, 256, g1p_sharedmem>>>(
g1p_tmp,
h);
393 g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512,
g1p_tmp,
h_fft);
424 printf(
"=== RUN %s\n",
"g1p_ift: h_fft -> h");
426 for(
int testIDX=0; testIDX<=1; testIDX++){
429 g1p_ift_wrapper<<<rows, 256, g1p_sharedmem>>>(
g1p_tmp,
h_fft);
434 g1p_eq_wrapper<<<16, 32>>>(cmp, 512*512,
g1p_tmp,
h);
465 printf(
"=== RUN %s\n",
"g1p_ift: hext_fft -> h");
466 for(
int testIDX=0; testIDX<=1; testIDX++){
471 fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
476 g1p_eq_wrapper<<<8, 32>>>(cmp, rows*512,
g1p_tmp,
h);
503 printf(
"=== RUN %s\n",
"fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients");
504 for(
int testIDX=0; testIDX<=1; testIDX++){
507 fk20_poly2toeplitz_coefficients<<<rows, 256 >>>(fr_tmp_,
polynomial);
510 CUDASYNC(
"fk20_poly2toeplitz_coefficients");
545 printf(
"=== RUN %s\n",
"fk20_poly2hext_fft: polynomial -> hext_fft");
546 for(
int testIDX=0; testIDX<=1; testIDX++){
581 printf(
"=== RUN %s\n",
"fk20_poly2h_fft: polynomial -> h_fft");
583 for(
int testIDX=0; testIDX<=1; testIDX++){
620 printf(
"=== RUN %s\n",
"hext_fft2h_fft_512: hext_fft -> h_fft");
621 for(
int testIDX=0; testIDX<=1; testIDX++){
629 g1p_eq_wrapper<<<8, 32>>>(cmp, rows*512,
g1p_tmp,
h);
655 printf(
"=== RUN %s\n",
"fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft");
656 for(
int testIDX=0; testIDX<=1; testIDX++){
684 void fk20_poly2toeplitz_coefficients_fft_test(
unsigned rows){
691 printf(
"=== RUN %s\n",
"fk20_poly2toeplitz_coefficients_fft: polynomial -> toeplitz_coefficients_fft");
692 memset(fr_tmp_, 0xdeadbeef,512*16*512*
sizeof(
fr_t));
694 fk20_poly2toeplitz_coefficients_fft<<<rows, 256>>>(fr_tmp_,
polynomial);
695 err = cudaDeviceSynchronize();
696 CUDASYNC(
"fk20_poly2toeplitz_coefficients_fft");
720 #define START_INDEX 3
732 if (target == NULL || size <= 0 || step <= 0)
736 if (i + step < size){
737 memcpy(tmp, target+i,
sizeof(
fr_t));
738 memcpy(target+i, target+i+1,
sizeof(
fr_t));
739 memcpy(target+i+1, tmp,
sizeof(
fr_t));
754 if (target == NULL || size <= 0 || step <= 0)
758 if (i + step < size) {
759 memcpy(&tmp, target+i,
sizeof(
g1p_t));
760 memcpy(target+i, target+i+1,
sizeof(
g1p_t));
761 memcpy(target+i+1, &tmp,
sizeof(
g1p_t));
const size_t g1p_sharedmem
__host__ void fk20_poly2h_fft(g1p_t *h_fft, const fr_t *polynomial, const g1p_t xext_fft[8192], unsigned rows)
polynomial + xext_fft -> h_fft This function is a wrapper for the full FK20 computation,...
__global__ void fk20_poly2hext_fft(g1p_t *hext_fft, const fr_t *polynomial, const g1p_t xext_fft[8192])
polynomial + xext_fft -> hext_fft
#define SET_SHAREDMEM(SZ, FN)
#define CUDASYNC(fmt,...)
__global__ void fk20_hext_fft2h_fft(g1p_t *h_fft, const g1p_t *hext_fft)
hext_fft -> h_fft
const size_t fr_sharedmem
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t hext_fft[512 *512]
void fk20_poly2toeplitz_coefficients_512(unsigned rows)
Test for fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients.
void fk20_poly2h_fft_512(unsigned rows)
Test for fk20_poly2h_fft: polynomial -> h_fft.
__managed__ g1p_t h_fft[512 *512]
int main(int argc, char **argv)
Executes a many-row tests on FK20. Behavior is similar to fk20test.cu but using many GPU blocks,...
void fullTest_512(unsigned rows)
Executes many FK20 computations on a single row, with a check on each step. A computation failure wil...
void fullTestFalseability_512(unsigned rows)
Similar to fullTest, but polynomial is has changes done to it. The function checks for false-positive...
void h2h_fft_512(unsigned rows)
Test for g1p_fft: h -> h_fft".
__managed__ fr_t toeplitz_coefficients[512 *16][512]
void fk20_msmloop_512(unsigned rows)
Test for fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft.
__managed__ fr_t polynomial[512 *4096]
void hext_fft2h_512(unsigned rows)
Test for g1p_ift: hext_fft -> h.
__managed__ fr_t toeplitz_coefficients_fft[512 *16][512]
void varMangle(fr_t *target, size_t size, unsigned step)
swap elements at positions multiple of step. Nondestructive, call a second time to undo the changes
void fk20_poly2hext_fft_512(unsigned rows)
Test for fk20_poly2hext_fft: polynomial -> hext_fft.
__managed__ g1p_t h[512 *512]
void toeplitz_coefficients2toeplitz_coefficients_fft_512(unsigned rows)
Test for fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft.
void hext_fft2h_fft_512(unsigned rows)
Test for hext_fft2h_fft_512: hext_fft -> h_fft.
void h_fft2h_512(unsigned rows)
Test for g1p_ift: h_fft -> h.
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
__global__ void fr_fft_wrapper(fr_t *output, const fr_t *input)
wrapper for fr_fft: FFT for fr_t[512]
__global__ void g1p_fft_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_fft: FFT for arrays of g1p_t with length 512
__global__ void g1p_ift_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_ift: inverse FFT for arrays of g1p_t with length 512
__shared__ g1p_t g1p_tmp[]
G1 point in projective coordinates.
void testinit()
Sets a global variable to true if the STDOUT is a terminal. Needs to be done like so because while a ...
#define NEGCMPCHECK(LENGTH)
#define NEGPRINTPASS(pass)