FK20 CUDA
fk20_512test.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include <cstring>
6 #include <time.h>
7 #include "fr.cuh"
8 #include "fp.cuh"
9 #include "g1.cuh"
10 #include "fk20.cuh"
11 #include "fk20test.cuh"
12 
13 // Test vector inputs
14 
15 extern __managed__ g1p_t xext_fft[16][512];
16 extern __managed__ fr_t polynomial[512*4096];
17 
18 // Intermediate values
19 
20 extern __managed__ fr_t toeplitz_coefficients[512*16][512];
21 extern __managed__ fr_t toeplitz_coefficients_fft[512*16][512];
22 extern __managed__ g1p_t hext_fft[512*512];
23 extern __managed__ g1p_t h[512*512];
24 
25 // Test vector output
26 
27 extern __managed__ g1p_t h_fft[512*512];
28 
29 // Workspace
30 
31 static __managed__ uint8_t cmp[512*16*512];
32 static __managed__ fr_t fr_tmp_[512*16*512];
33 static __managed__ g1p_t g1p_tmp[512*512];
34 
35 #define PatternOnWorkspaceMemory
36 #ifdef PatternOnWorkspaceMemory
37  #define PTRN_G1PTMP memset(g1p_tmp, 0x88, 512*512*sizeof(g1p_t));
38  #define PTRN_FRTMP memset(fr_tmp_, 0x88, 512*16*512*sizeof(fr_t));
39 #else
40  #define PTRN_G1PTMP
41  #define PTRN_FRTMP
42 #endif
43 
44 // 512-row tests
45 
47 void h2h_fft_512(unsigned rows);
48 void h_fft2h_512(unsigned rows);
49 void hext_fft2h_512(unsigned rows);
50 void hext_fft2h_fft_512(unsigned rows);
51 
52 void fk20_poly2toeplitz_coefficients_512(unsigned rows);
53 void fk20_poly2hext_fft_512(unsigned rows);
54 void fk20_poly2h_fft_512(unsigned rows);
55 void fk20_msmloop_512(unsigned rows);
56 //void fk20_poly2toeplitz_coefficients_fft_test(unsigned rows);
57 void fullTest_512(unsigned rows);
58 void fullTestFalseability_512(unsigned rows);
59 
60 // Useful for the Falsifiability tests
61 void varMangle(fr_t *target, size_t size, unsigned step);
62 void varMangle(g1p_t *target, size_t size, unsigned step);
63 
64 /******************************************************************************/
65 
75 int main(int argc, char **argv) {
76  testinit(); // setup functions here
77 
78  unsigned rows = 2;
79 
80  if (argc > 1)
81  rows = atoi(argv[1]);
82 
83  if (rows < 1)
84  rows = 1;
85 
86  if (rows > 512)
87  rows = 512;
88 
89  printf("=== RUN test with %d rows\n\n", rows);
90 
91  // FFT tests
93  h2h_fft_512(rows);
94  h_fft2h_512(rows);
95  hext_fft2h_512(rows);
96  // hext_fft2h_fft_512(rows); //Deprecated function
97 
98  // Polynomial tests
101 
102  // MSM test
103  fk20_msmloop_512(rows);
104 
105  // Full FK20 tests
106  fk20_poly2h_fft_512(rows);
107  fullTest_512(rows);
109  //fk20_poly2toeplitz_coefficients_fft_test(rows); //Deprecated function
110 
111  return 0;
112 }
113 
124 /******************************************************************************/
125 
133 void fullTest_512(unsigned rows){
134  cudaError_t err;
135  bool pass = true;
136  CLOCKINIT;
137 
138  // Setup
139 
143 
144  // polynomial -> tc
145 
146  printf("\n>>>>Full integration test\n"); fflush(stdout);
147  printf("polynomial -> tc\n"); fflush(stdout);
148 
149  CLOCKSTART;
150  fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr_tmp_, polynomial);
151  CUDASYNC("fk20_poly2toeplitz_coefficients");
152  CLOCKEND;
153 
154  clearRes512;
155  fr_eq_wrapper<<<256, 32>>>(cmp, 16*512, fr_tmp_, (fr_t *)toeplitz_coefficients);
156  CUDASYNC("fr_eq_wrapper");
157  for (int i=0; i<16*512; i++)
158  if (cmp[i] != 1) {
159  printf("poly2tc error %04x\n", i);
160  pass = false;
161  }
162  PRINTPASS(pass);
163 
164  // tc -> tc_fft
165 
166  printf("tc -> tc_fft\n"); fflush(stdout);
167 
168  CLOCKSTART;
169  fr_fft_wrapper<<<rows*16, 256, fr_sharedmem>>>(fr_tmp_, fr_tmp_); // 16 per row
170  CUDASYNC("fr_fft_wrapper");
171  CLOCKEND;
172 
173  clearRes512;
174  fr_eq_wrapper<<<256, 32>>>(cmp, rows*16*512, fr_tmp_, (fr_t *)toeplitz_coefficients_fft);
175  CUDASYNC("fr_eq_wrapper");
176  CMPCHECK(rows*16*512);
177  PRINTPASS(pass);
178 
179  // tc_fft -> hext_fft
180 
181  printf("tc_fft -> hext_fft\n"); fflush(stdout);
182 
183  CLOCKSTART;
184  fk20_msm<<<rows, 256>>>(g1p_tmp, fr_tmp_, (g1p_t *)xext_fft);
185  CUDASYNC("fk20_msm");
186  CLOCKEND;
187 
188  clearRes512;
189  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, (g1p_t *)hext_fft);
190  CUDASYNC("g1p_eq_wrapper");
191  CMPCHECK(rows*512);
192  PRINTPASS(pass);
193 
194  // hext_fft -> hext -> h
195 
196  printf("hext_fft -> hext -> h\n"); fflush(stdout);
197 
198  CLOCKSTART;
199  g1p_ift_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, g1p_tmp);
200  CUDASYNC("g1p_ift_wrapper");
201  fk20_hext2h<<<rows, 256>>>(g1p_tmp);
202  CLOCKEND;
203  CUDASYNC("fk20_hext2h");
204 
205  clearRes512;
206  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, (g1p_t *)h);
207  CUDASYNC("g1p_eq_wrapper");
208  CMPCHECK(rows*512);
209  PRINTPASS(pass);
210 
211  // h -> h_fft
212 
213  printf("h -> h_fft\n"); fflush(stdout);
214 
215  CLOCKSTART;
216  g1p_fft_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, g1p_tmp);
217  CUDASYNC("g1p_fft_wrapper");
218  CLOCKEND;
219 
220  clearRes512;
221  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, h_fft);
222  CUDASYNC("g1p_eq_wrapper");
223  CMPCHECK(rows*512);
224  PRINTPASS(pass);
225 }
226 
235 void fullTestFalseability_512(unsigned rows){
236  cudaError_t err;
237  bool pass = true;
238  CLOCKINIT;
239 
240  // Setup
241 
245 
246  // polynomial -> tc
247 
248  varMangle(polynomial, 512*4096, 64);
249 
250  printf("\n>>>>Full integration test - Falsifiability\n"); fflush(stdout);
251  printf("polynomial -> tc\n"); fflush(stdout);
252 
253  CLOCKSTART;
254  fk20_poly2toeplitz_coefficients<<<rows, 256, fr_sharedmem>>>(fr_tmp_, polynomial);
255  CUDASYNC("fk20_poly2toeplitz_coefficients");
256  CLOCKEND;
257 
258  clearRes512;
259  fr_eq_wrapper<<<256, 32>>>(cmp, 16*512, fr_tmp_, (fr_t *)toeplitz_coefficients);
260  CUDASYNC("fr_eq_wrapper");
261  NEGCMPCHECK(16*512);
262  NEGPRINTPASS(pass);
263 
264  // tc -> tc_fft
265 
266  printf("tc -> tc_fft\n"); fflush(stdout);
267 
268  CLOCKSTART;
269  fr_fft_wrapper<<<rows*16, 256, fr_sharedmem>>>(fr_tmp_, fr_tmp_); // 16 per row
270  CUDASYNC("fr_fft_wrapper");
271  CLOCKEND;
272 
273  clearRes512;
274  fr_eq_wrapper<<<256, 32>>>(cmp, rows*16*512, fr_tmp_, (fr_t *)toeplitz_coefficients_fft);
275  CUDASYNC("fr_eq_wrapper");
276  NEGCMPCHECK(rows*16*512);
277  NEGPRINTPASS(pass);
278 
279  // tc_fft -> hext_fft
280 
281  printf("tc_fft -> hext_fft\n"); fflush(stdout);
282 
283  CLOCKSTART;
284  fk20_msm<<<rows, 256>>>(g1p_tmp, fr_tmp_, (g1p_t *)xext_fft);
285  CUDASYNC("fk20_msm");
286  CLOCKEND;
287 
288  clearRes512;
289  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, (g1p_t *)hext_fft);
290  CUDASYNC("g1p_eq_wrapper");
291  NEGCMPCHECK(rows*512);
292  NEGPRINTPASS(pass);
293 
294  // hext_fft -> hext -> h
295 
296  printf("hext_fft -> hext -> h\n"); fflush(stdout);
297 
298  CLOCKSTART;
299  g1p_ift_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, g1p_tmp);
300  CUDASYNC("g1p_ift_wrapper");
301  fk20_hext2h<<<rows, 256>>>(g1p_tmp);
302  CLOCKEND;
303  CUDASYNC("fk20_hext2h");
304 
305  clearRes512;
306  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, (g1p_t *)h);
307  CUDASYNC("g1p_eq_wrapper");
308  NEGCMPCHECK(rows*512);
309  NEGPRINTPASS(pass);
310 
311  // h -> h_fft
312 
313  printf("h -> h_fft\n"); fflush(stdout);
314 
315  CLOCKSTART;
316  g1p_fft_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, g1p_tmp);
317  CUDASYNC("g1p_fft_wrapper");
318  CLOCKEND;
319 
320  clearRes512;
321  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, h_fft);
322  CUDASYNC("g1p_eq_wrapper");
323  NEGCMPCHECK(rows*512);
324  NEGPRINTPASS(pass);
325 }
326 
327 /*******************************************************************************
328 
329 The testing functions follow an common template, described in ./doc/fk20test.md
330 
331 *******************************************************************************/
332 
339  PTRN_FRTMP;
340  cudaError_t err;
341  bool pass = true;
342  CLOCKINIT;
343 
344  printf("=== RUN %s\n", "fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft");
345  for(int testIDX=0; testIDX<=1; testIDX++){
346 
347  CLOCKSTART;
348  fr_fft_wrapper<<<rows*16, 256, fr_sharedmem>>>(fr_tmp_, (fr_t *)toeplitz_coefficients);
349  CUDASYNC("fr_fft_wrapper");
350  CLOCKEND;
351 
352  clearRes;
353  fr_eq_wrapper<<<256, 32>>>(cmp, rows*16*512, fr_tmp_, (fr_t *)toeplitz_coefficients_fft);
354  CUDASYNC("fr_eq_wrapper");
355 
356  // Check FFT result
357 
358  if (testIDX == 0){
359  CMPCHECK(rows*16*512)
360  PRINTPASS(pass);
361  }
362  else{
363  NEGCMPCHECK(rows*16*512);
364  NEGPRINTPASS(pass);
365  }
366 
367  varMangle((fr_t*)toeplitz_coefficients_fft, 8192*512, 512);
368  }
369 }
370 
376 void h2h_fft_512(unsigned rows){
377  PTRN_G1PTMP;
378  cudaError_t err;
379  bool pass = true;
380  CLOCKINIT;
381 
383 
384  printf("=== RUN %s\n", "g1p_fft: h -> h_fft");
385  for(int testIDX=0; testIDX<=1; testIDX++){
386 
387  CLOCKSTART;
388  g1p_fft_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, h);
389  CUDASYNC("g1p_fft_wrapper");
390  CLOCKEND;
391 
392  clearRes;
393  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, h_fft);
394  CUDASYNC("g1p_eq_wrapper");
395 
396  // Check FFT result
397 
398  if (testIDX == 0){
399  CMPCHECK(rows*512)
400  PRINTPASS(pass);
401  }
402  else{
403  NEGCMPCHECK(rows*512);
404  NEGPRINTPASS(pass);
405  }
406 
407  varMangle(h, 512*512, 128);
408  }
409 }
410 
416 void h_fft2h_512(unsigned rows){
417  PTRN_G1PTMP;
418  cudaError_t err;
419  bool pass = true;
420  CLOCKINIT;
421 
423 
424  printf("=== RUN %s\n", "g1p_ift: h_fft -> h");
425 
426  for(int testIDX=0; testIDX<=1; testIDX++){
427 
428  CLOCKSTART;
429  g1p_ift_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, h_fft);
430  CUDASYNC("g1p_ift_wrapper");
431  CLOCKEND;
432 
433  clearRes;
434  g1p_eq_wrapper<<<16, 32>>>(cmp, 512*512, g1p_tmp, h);
435  CUDASYNC("g1p_eq_wrapper");
436 
437  // Check IFT result
438 
439  if (testIDX == 0){
440  CMPCHECK(rows*512)
441  PRINTPASS(pass);
442  }
443  else{
444  NEGCMPCHECK(rows*512);
445  NEGPRINTPASS(pass);
446  }
447 
448  varMangle(h_fft, 512*512, 128);
449  }
450 }
451 
457 void hext_fft2h_512(unsigned rows){
458  PTRN_G1PTMP;
459  cudaError_t err;
460  bool pass = true;
461  CLOCKINIT;
462 
464 
465  printf("=== RUN %s\n", "g1p_ift: hext_fft -> h");
466  for(int testIDX=0; testIDX<=1; testIDX++){
467 
468  CLOCKSTART;
469  g1p_ift_wrapper<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, hext_fft);
470  CUDASYNC("g1p_ift_wrapper");
471  fk20_hext2h<<<rows, 256>>>(g1p_tmp);
472  CUDASYNC("fk20_hext2h");
473  CLOCKEND;
474 
475  clearRes;
476  g1p_eq_wrapper<<<8, 32>>>(cmp, rows*512, g1p_tmp, h);
477  CUDASYNC("g1p_eq_wrapper");
478 
479  if (testIDX == 0){
480  CMPCHECK(rows*512)
481  PRINTPASS(pass);
482  }
483  else{
484  NEGCMPCHECK(rows*512);
485  NEGPRINTPASS(pass);
486  }
487 
488  varMangle(hext_fft, 512*512, 128);
489  }
490 }
491 
498  PTRN_FRTMP;
499  cudaError_t err;
500  bool pass = true;
501  CLOCKINIT;
502 
503  printf("=== RUN %s\n", "fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients");
504  for(int testIDX=0; testIDX<=1; testIDX++){
505 
506  CLOCKSTART;
507  fk20_poly2toeplitz_coefficients<<<rows, 256 >>>(fr_tmp_, polynomial);
508  //IMPORTANT: This function does not need shared memory. Making the kernel call with a dynamic shared memory allocation
509  //is known to cause some suble bugs, that not always show during normal execution.
510  CUDASYNC("fk20_poly2toeplitz_coefficients");
511  CLOCKEND;
512 
513  clearRes;
514  fr_eq_wrapper<<<1, 32>>>(cmp, rows*16*512, fr_tmp_, (fr_t *)toeplitz_coefficients);
515  CUDASYNC("fr_eq_wrapper");
516 
517  if (testIDX == 0){
518  CMPCHECK(rows*16*512)
519  PRINTPASS(pass);
520  }
521  else{
522  NEGCMPCHECK(rows*16*512);
523  NEGPRINTPASS(pass);
524  }
525 
526  varMangle(polynomial, 512*4096, 8);
527  }
528 }
529 
535 void fk20_poly2hext_fft_512(unsigned rows){
536  PTRN_G1PTMP;
537  cudaError_t err;
538  bool pass = true;
539  CLOCKINIT;
540 
541  pass = true;
542 
544 
545  printf("=== RUN %s\n", "fk20_poly2hext_fft: polynomial -> hext_fft");
546  for(int testIDX=0; testIDX<=1; testIDX++){
547 
548  CLOCKSTART;
549  fk20_poly2hext_fft<<<rows, 256, fr_sharedmem>>>(g1p_tmp, polynomial, (const g1p_t *)xext_fft);
550  CUDASYNC("fk20_poly2hext_fft");
551  CLOCKEND;
552 
553  clearRes;
554  g1p_eq_wrapper<<<1, 32>>>(cmp, rows*512, g1p_tmp, (g1p_t *)hext_fft);
555  CUDASYNC("g1p_eq_wrapper");
556 
557  if (testIDX == 0){
558  CMPCHECK(rows*512)
559  PRINTPASS(pass);
560  }
561  else{
562  NEGCMPCHECK(rows*512);
563  NEGPRINTPASS(pass);
564  }
565 
566  varMangle(( g1p_t *)xext_fft, 16*512, 32);
567  }
568 }
569 
575 void fk20_poly2h_fft_512(unsigned rows){
577  cudaError_t err;
578  bool pass = true;
579  CLOCKINIT;
580 
581  printf("=== RUN %s\n", "fk20_poly2h_fft: polynomial -> h_fft");
582 
583  for(int testIDX=0; testIDX<=1; testIDX++){
584 
585  CLOCKSTART;
586  fk20_poly2h_fft(g1p_tmp, polynomial, (const g1p_t *)xext_fft, rows);
587  CUDASYNC("fk20_poly2h_fft");
588  CLOCKEND;
589 
590  clearRes;
591  g1p_eq_wrapper<<<1, 32>>>(cmp, rows*512, g1p_tmp, (g1p_t *)h_fft);
592  CUDASYNC("g1p_eq_wrapper");
593 
594  if (testIDX == 0){
595  CMPCHECK(rows*512)
596  PRINTPASS(pass);
597  }
598  else{
599  NEGCMPCHECK(rows*512);
600  NEGPRINTPASS(pass);
601  }
602 
603  varMangle(( g1p_t *)xext_fft, 16*512, 32);
604  }
605 }
606 
612 void hext_fft2h_fft_512(unsigned rows){
613  PTRN_G1PTMP;
614  cudaError_t err;
615  bool pass = true;
616  CLOCKINIT;
617 
619 
620  printf("=== RUN %s\n", "hext_fft2h_fft_512: hext_fft -> h_fft");
621  for(int testIDX=0; testIDX<=1; testIDX++){
622 
623  CLOCKSTART;
624  fk20_hext_fft2h_fft<<<rows, 256, g1p_sharedmem>>>(g1p_tmp, hext_fft);
625  CUDASYNC("fk20_hext_fft2h_fft");
626  CLOCKEND;
627 
628  clearRes;
629  g1p_eq_wrapper<<<8, 32>>>(cmp, rows*512, g1p_tmp, h);
630  CUDASYNC("g1p_eq_wrapper");
631 
632  if (testIDX == 0){
633  CMPCHECK(rows*512)
634  PRINTPASS(pass);
635  }
636  else{
637  NEGCMPCHECK(rows*512);
638  NEGPRINTPASS(pass);
639  }
640 
641  varMangle(hext_fft, 512*512, 32);
642  }
643 }
644 
650 void fk20_msmloop_512(unsigned rows){
651  CLOCKINIT;
652  cudaError_t err;
653  bool pass = true;
654 
655  printf("=== RUN %s\n", "fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft");
656  for(int testIDX=0; testIDX<=1; testIDX++){
657 
658  CLOCKSTART;
659  fk20_msm<<<rows, 256>>>(g1p_tmp, (const fr_t*)toeplitz_coefficients_fft, (const g1p_t*)xext_fft);
660  CUDASYNC("fk20_msm");
661  CLOCKEND;
662 
663  clearRes;
664  g1p_eq_wrapper<<<16, 32>>>(cmp, rows*512, g1p_tmp, (g1p_t *)hext_fft);
665  CUDASYNC("g1p_eq_wrapper");
666 
667  // Check result
668 
669  if (testIDX == 0){
670  CMPCHECK(rows*512)
671  PRINTPASS(pass);
672  }
673  else{
674  NEGCMPCHECK(rows*512);
675  NEGPRINTPASS(pass);
676  }
677 
678  varMangle((fr_t*)toeplitz_coefficients_fft, 8192*512, 512);
679  }
680 }
681 
682 //Deprecated funtion
683 #if 0
684  void fk20_poly2toeplitz_coefficients_fft_test(unsigned rows){
685  // Test for deprecated function.
686  PTRN_FRTMP;
687  CLOCKINIT;
688  cudaError_t err;
689  bool pass = true;
690 
691  printf("=== RUN %s\n", "fk20_poly2toeplitz_coefficients_fft: polynomial -> toeplitz_coefficients_fft");
692  memset(fr_tmp_, 0xdeadbeef,512*16*512*sizeof(fr_t)); //pattern on tmp dest.
693  CLOCKSTART;
694  fk20_poly2toeplitz_coefficients_fft<<<rows, 256>>>(fr_tmp_, polynomial);
695  err = cudaDeviceSynchronize();
696  CUDASYNC("fk20_poly2toeplitz_coefficients_fft");
697  CLOCKEND;
698  clearRes;
699  fr_eq_wrapper<<<16, 256>>>(cmp, rows*16*512, fr_tmp_, (fr_t *)toeplitz_coefficients_fft);
700  CUDASYNC("fr_eq_wrapper");
701  // Check result
702 
703  CMPCHECK(rows*16*512);
704  PRINTPASS(pass);
705  }
706 #endif
707 
709 // //
710 // Useful functions for the falsifiability tests //
711 // //
712 // Useful for the Falsifiability tests //
713 // If you are using a variable where i*step == i*step+1, you can end up with //
714 // a false(false positive). //
715 // A staggered start helps to mitigate it, but it can happen with a very //
716 // small probability. //
717 // //
719 
720 #define START_INDEX 3
721 
730 void varMangle(fr_t *target, size_t size, unsigned step){
731  fr_t tmp;
732  if (target == NULL || size <= 0 || step <= 0)
733  return;
734 
735  for (int i = START_INDEX; i < size; i += step) {
736  if (i + step < size){
737  memcpy(tmp, target+i, sizeof(fr_t));
738  memcpy(target+i, target+i+1, sizeof(fr_t));
739  memcpy(target+i+1, tmp, sizeof(fr_t));
740  }
741  }
742 }
743 
752 void varMangle(g1p_t *target, size_t size, unsigned step){
753  g1p_t tmp;
754  if (target == NULL || size <= 0 || step <= 0)
755  return;
756 
757  for (int i = START_INDEX; i < size; i += step) {
758  if (i + step < size) {
759  memcpy(&tmp, target+i, sizeof(g1p_t));
760  memcpy(target+i, target+i+1, sizeof(g1p_t));
761  memcpy(target+i+1, &tmp, sizeof(g1p_t));
762  }
763  }
764 }
765 
766 #undef START_INDEX
767 // vim: ts=4 et sw=4 si
const size_t g1p_sharedmem
Definition: fk20.cuh:14
__host__ void fk20_poly2h_fft(g1p_t *h_fft, const fr_t *polynomial, const g1p_t xext_fft[8192], unsigned rows)
polynomial + xext_fft -> h_fft This function is a wrapper for the full FK20 computation,...
__global__ void fk20_poly2hext_fft(g1p_t *hext_fft, const fr_t *polynomial, const g1p_t xext_fft[8192])
polynomial + xext_fft -> hext_fft
#define SET_SHAREDMEM(SZ, FN)
Definition: fk20.cuh:48
#define CUDASYNC(fmt,...)
Definition: fk20.cuh:39
__global__ void fk20_hext_fft2h_fft(g1p_t *h_fft, const g1p_t *hext_fft)
hext_fft -> h_fft
const size_t fr_sharedmem
Definition: fk20.cuh:15
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t hext_fft[512 *512]
void fk20_poly2toeplitz_coefficients_512(unsigned rows)
Test for fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients.
void fk20_poly2h_fft_512(unsigned rows)
Test for fk20_poly2h_fft: polynomial -> h_fft.
__managed__ g1p_t h_fft[512 *512]
int main(int argc, char **argv)
Executes a many-row tests on FK20. Behavior is similar to fk20test.cu but using many GPU blocks,...
Definition: fk20_512test.cu:75
#define PTRN_FRTMP
Definition: fk20_512test.cu:38
void fullTest_512(unsigned rows)
Executes many FK20 computations on a single row, with a check on each step. A computation failure wil...
void fullTestFalseability_512(unsigned rows)
Similar to fullTest, but polynomial is has changes done to it. The function checks for false-positive...
void h2h_fft_512(unsigned rows)
Test for g1p_fft: h -> h_fft".
__managed__ fr_t toeplitz_coefficients[512 *16][512]
void fk20_msmloop_512(unsigned rows)
Test for fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft.
__managed__ fr_t polynomial[512 *4096]
void hext_fft2h_512(unsigned rows)
Test for g1p_ift: hext_fft -> h.
__managed__ fr_t toeplitz_coefficients_fft[512 *16][512]
void varMangle(fr_t *target, size_t size, unsigned step)
swap elements at positions multiple of step. Nondestructive, call a second time to undo the changes
void fk20_poly2hext_fft_512(unsigned rows)
Test for fk20_poly2hext_fft: polynomial -> hext_fft.
__managed__ g1p_t h[512 *512]
#define START_INDEX
void toeplitz_coefficients2toeplitz_coefficients_fft_512(unsigned rows)
Test for fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft.
void hext_fft2h_fft_512(unsigned rows)
Test for hext_fft2h_fft_512: hext_fft -> h_fft.
void h_fft2h_512(unsigned rows)
Test for g1p_ift: h_fft -> h.
#define PTRN_G1PTMP
Definition: fk20_512test.cu:37
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__global__ void fr_fft_wrapper(fr_t *output, const fr_t *input)
wrapper for fr_fft: FFT for fr_t[512]
Definition: fr_fft.cu:316
__global__ void g1p_fft_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_fft: FFT for arrays of g1p_t with length 512
Definition: g1p_fft.cu:336
__global__ void g1p_ift_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_ift: inverse FFT for arrays of g1p_t with length 512
Definition: g1p_fft.cu:349
__shared__ g1p_t g1p_tmp[]
G1 point in projective coordinates.
Definition: g1.cuh:27
void testinit()
Sets a global variable to true if the STDOUT is a terminal. Needs to be done like so because while a ...
Definition: test.cu:18
#define CLOCKINIT
Definition: test.h:98
#define clearRes512
Definition: test.h:92
#define NEGCMPCHECK(LENGTH)
Definition: test.h:116
#define clearRes
Definition: test.h:87
#define PRINTPASS(pass)
Definition: test.h:25
#define CLOCKEND
Definition: test.h:100
#define CLOCKSTART
Definition: test.h:99
#define NEGPRINTPASS(pass)
Definition: test.h:32
#define CMPCHECK(LENGTH)
Definition: test.h:106