23 extern __managed__ 
g1p_t h[512*512];
 
   31 static __managed__ uint8_t cmp[512*16*512];
 
   32 static __managed__ 
fr_t fr_tmp_[512*16*512];
 
   35 #define PatternOnWorkspaceMemory 
   36 #ifdef PatternOnWorkspaceMemory 
   37     #define PTRN_G1PTMP memset(g1p_tmp, 0x88, 512*512*sizeof(g1p_t)); 
   38     #define PTRN_FRTMP  memset(fr_tmp_, 0x88, 512*16*512*sizeof(fr_t)); 
   75 int main(
int argc, 
char **argv) {
 
   89     printf(
"=== RUN test with %d rows\n\n", rows);
 
  146     printf(
"\n>>>>Full integration test\n"); fflush(stdout);
 
  147     printf(
"polynomial -> tc\n"); fflush(stdout);
 
  150     fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr_tmp_, 
polynomial);
 
  151     CUDASYNC(
"fk20_poly2toeplitz_coefficients");
 
  157     for (
int i=0; i<16*512; i++)
 
  159             printf(
"poly2tc error %04x\n", i);
 
  166     printf(
"tc -> tc_fft\n"); fflush(stdout);
 
  169     fr_fft_wrapper<<<rows*16, 256, fr_sharedmem>>>(fr_tmp_, fr_tmp_);  
 
  181     printf(
"tc_fft -> hext_fft\n"); fflush(stdout);
 
  196     printf(
"hext_fft -> hext -> h\n"); fflush(stdout);
 
  201     fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
 
  206     g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, 
g1p_tmp, (
g1p_t *)
h);
 
  213     printf(
"h -> h_fft\n"); fflush(stdout);
 
  221     g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, 
g1p_tmp, 
h_fft);
 
  250     printf(
"\n>>>>Full integration test - Falsifiability\n"); fflush(stdout);
 
  251     printf(
"polynomial -> tc\n"); fflush(stdout);
 
  254     fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr_tmp_, 
polynomial);
 
  255     CUDASYNC(
"fk20_poly2toeplitz_coefficients");
 
  266     printf(
"tc -> tc_fft\n"); fflush(stdout);
 
  269     fr_fft_wrapper<<<rows*16, 256, fr_sharedmem>>>(fr_tmp_, fr_tmp_);  
 
  281     printf(
"tc_fft -> hext_fft\n"); fflush(stdout);
 
  296     printf(
"hext_fft -> hext -> h\n"); fflush(stdout);
 
  301     fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
 
  306     g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, 
g1p_tmp, (
g1p_t *)
h);
 
  313     printf(
"h -> h_fft\n"); fflush(stdout);
 
  321     g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, 
g1p_tmp, 
h_fft);
 
  344     printf(
"=== RUN   %s\n", 
"fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft");
 
  345     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  384     printf(
"=== RUN   %s\n", 
"g1p_fft: h -> h_fft");
 
  385     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  388         g1p_fft_wrapper<<<rows, 256, g1p_sharedmem>>>(
g1p_tmp, 
h);
 
  393         g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, 
g1p_tmp, 
h_fft);
 
  424     printf(
"=== RUN   %s\n", 
"g1p_ift: h_fft -> h");
 
  426     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  429         g1p_ift_wrapper<<<rows, 256, g1p_sharedmem>>>(
g1p_tmp, 
h_fft);
 
  434         g1p_eq_wrapper<<<16, 32>>>(cmp, 512*512, 
g1p_tmp, 
h);
 
  465     printf(
"=== RUN   %s\n", 
"g1p_ift: hext_fft -> h");
 
  466     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  471         fk20_hext2h<<<rows, 256>>>(
g1p_tmp);
 
  476         g1p_eq_wrapper<<<8, 32>>>(cmp, rows*512, 
g1p_tmp, 
h);
 
  503     printf(
"=== RUN   %s\n", 
"fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients");
 
  504     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  507         fk20_poly2toeplitz_coefficients<<<rows, 256 >>>(fr_tmp_, 
polynomial);
 
  510         CUDASYNC(
"fk20_poly2toeplitz_coefficients");
 
  545     printf(
"=== RUN   %s\n", 
"fk20_poly2hext_fft: polynomial -> hext_fft");
 
  546     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  581     printf(
"=== RUN   %s\n", 
"fk20_poly2h_fft: polynomial -> h_fft");
 
  583     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  620     printf(
"=== RUN   %s\n", 
"hext_fft2h_fft_512: hext_fft -> h_fft");
 
  621     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  629         g1p_eq_wrapper<<<8, 32>>>(cmp, rows*512, 
g1p_tmp, 
h);
 
  655     printf(
"=== RUN   %s\n", 
"fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft");
 
  656     for(
int testIDX=0; testIDX<=1; testIDX++){
 
  684     void fk20_poly2toeplitz_coefficients_fft_test(
unsigned rows){
 
  691         printf(
"=== RUN   %s\n", 
"fk20_poly2toeplitz_coefficients_fft: polynomial -> toeplitz_coefficients_fft");
 
  692         memset(fr_tmp_, 0xdeadbeef,512*16*512*
sizeof(
fr_t)); 
 
  694         fk20_poly2toeplitz_coefficients_fft<<<rows, 256>>>(fr_tmp_, 
polynomial);
 
  695         err = cudaDeviceSynchronize();
 
  696         CUDASYNC(
"fk20_poly2toeplitz_coefficients_fft");
 
  720 #define START_INDEX 3 
  732     if (target == NULL || size <= 0 || step <= 0)
 
  736         if (i + step < size){
 
  737             memcpy(tmp, target+i, 
sizeof(
fr_t));
 
  738             memcpy(target+i, target+i+1, 
sizeof(
fr_t));
 
  739             memcpy(target+i+1, tmp, 
sizeof(
fr_t));
 
  754     if (target == NULL || size <= 0 || step <= 0)
 
  758         if (i + step < size) {
 
  759             memcpy(&tmp, target+i, 
sizeof(
g1p_t));
 
  760             memcpy(target+i, target+i+1, 
sizeof(
g1p_t));
 
  761             memcpy(target+i+1, &tmp, 
sizeof(
g1p_t));
 
const size_t g1p_sharedmem
__host__ void fk20_poly2h_fft(g1p_t *h_fft, const fr_t *polynomial, const g1p_t xext_fft[8192], unsigned rows)
polynomial + xext_fft -> h_fft This function is a wrapper for the full FK20 computation,...
__global__ void fk20_poly2hext_fft(g1p_t *hext_fft, const fr_t *polynomial, const g1p_t xext_fft[8192])
polynomial + xext_fft -> hext_fft
#define SET_SHAREDMEM(SZ, FN)
#define CUDASYNC(fmt,...)
__global__ void fk20_hext_fft2h_fft(g1p_t *h_fft, const g1p_t *hext_fft)
hext_fft -> h_fft
const size_t fr_sharedmem
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t hext_fft[512 *512]
void fk20_poly2toeplitz_coefficients_512(unsigned rows)
Test for fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients.
void fk20_poly2h_fft_512(unsigned rows)
Test for fk20_poly2h_fft: polynomial -> h_fft.
__managed__ g1p_t h_fft[512 *512]
int main(int argc, char **argv)
Executes a many-row tests on FK20. Behavior is similar to fk20test.cu but using many GPU blocks,...
void fullTest_512(unsigned rows)
Executes many FK20 computations on a single row, with a check on each step. A computation failure wil...
void fullTestFalseability_512(unsigned rows)
Similar to fullTest, but polynomial is has changes done to it. The function checks for false-positive...
void h2h_fft_512(unsigned rows)
Test for g1p_fft: h -> h_fft".
__managed__ fr_t toeplitz_coefficients[512 *16][512]
void fk20_msmloop_512(unsigned rows)
Test for fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft.
__managed__ fr_t polynomial[512 *4096]
void hext_fft2h_512(unsigned rows)
Test for g1p_ift: hext_fft -> h.
__managed__ fr_t toeplitz_coefficients_fft[512 *16][512]
void varMangle(fr_t *target, size_t size, unsigned step)
swap elements at positions multiple of step. Nondestructive, call a second time to undo the changes
void fk20_poly2hext_fft_512(unsigned rows)
Test for fk20_poly2hext_fft: polynomial -> hext_fft.
__managed__ g1p_t h[512 *512]
void toeplitz_coefficients2toeplitz_coefficients_fft_512(unsigned rows)
Test for fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft.
void hext_fft2h_fft_512(unsigned rows)
Test for hext_fft2h_fft_512: hext_fft -> h_fft.
void h_fft2h_512(unsigned rows)
Test for g1p_ift: h_fft -> h.
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
__global__ void fr_fft_wrapper(fr_t *output, const fr_t *input)
wrapper for fr_fft: FFT for fr_t[512]
__global__ void g1p_fft_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_fft: FFT for arrays of g1p_t with length 512
__global__ void g1p_ift_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_ift: inverse FFT for arrays of g1p_t with length 512
__shared__ g1p_t g1p_tmp[]
G1 point in projective coordinates.
void testinit()
Sets a global variable to true if the STDOUT is a terminal. Needs to be done like so because while a ...
#define NEGCMPCHECK(LENGTH)
#define NEGPRINTPASS(pass)