FK20 CUDA
fftTest.cu
Go to the documentation of this file.
1 //testing for the fk20, loselly based on the fk20test_kat.cu
2 #include <stdio.h>
3 #include <stdio.h>
4 #include <cuda.h>
5 #include <time.h>
6 #include "g1.cuh"
7 #include "fk20.cuh"
8 
9 extern "C"{
10  #include "parseFFTTest.h"
11 }
12 
13 __managed__ g1p_t g1p_input[512], g1p_output[512], g1p_expected[512];
14 
15 bool g1a_iszeroHost(const g1a_t &a) {
16  return (a.x[5] | a.x[4] | a.x[3] | a.x[2] | a.x[1] | a.x[0] |
17  a.y[5] | a.y[4] | a.y[3] | a.y[2] | a.y[1] | a.y[0]) == 0;
18 }
19 
20 void g1p_fromG1aHost(g1p_t &p, const g1a_t &a) {
21  if (g1a_iszeroHost(a)) {
22  p = { { 0, 0, 0, 0, 0, 0 }, { 1, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0 } };
23  }
24  for(int i=0; i<6; i++) p.x[i]=a.x[i];
25  for(int i=0; i<6; i++) p.y[i]=a.y[i];
26  //fp_one(p.z);
27  p.z[0]=1;
28  for(int i=2; i<6; i++) p.z[i]=0;
29 }
30 
31 void unpackffttest(ffttest_t testInputs, int testIDX, g1p_t g1p_input[512]){
32  g1a_t tmp;
33  // First, read the 256 fft input elements
34  for(int argidx=0; argidx<256; argidx++){
35  /* Because of limitation in the API of BLST, the test-case generator only has access to
36  * the affine representation of G1 elements -- where each element is represented as two Fp
37  * elements. The g1p_fft uses projective representation, where an extra Fp element is used.
38  * Note that FFTTestCase.fftInputp is TODO
39  */
40 
41  for(int j=0; j<6; j++){
42  tmp.x[j] = testInputs.testCase[testIDX].fftInput[argidx].word[j];
43  tmp.y[j] = testInputs.testCase[testIDX].fftInput[argidx].word[j+6];
44  }
45  // Convert these g1a to g1p
46  g1p_fromG1aHost(g1p_input[argidx], tmp);
47  }
48 
49  // The last 256 elements are set to infinity due to the design of the reference Python implementation
50 
51  for(int i=256; i<512; i++)
52  g1p_inf(g1p_input[i]);
53 }
54 
56  // Generates tests from randomness
57  return;
58 }
59 
60 void FFTTest(){
61  // Uses tests picked from actual use-cases, extracted from the instrumented Python implementation
62  const dim3 block(256,1,1);
63  const dim3 grid(512,1,1);
64  const size_t sharedmem = 73728; //72 KiB
65 
66  clock_t elapsedTime;
67 
68  // Read data from testFFT.in using partseFFTTest
69  const char inputFile[] = "testFFT.in";
70  ffttest_t testInputs = parseFFTTest(inputFile);
71  if (testInputs.nTest == 0){
72  exit(-1);
73  }
74  else{
75  fprintf(stderr, "<%s> Test inputs read: %d tests.\n", __func__, testInputs.nTest);
76  }
77 
78  // Convert testcase into g1p format
79  unpackffttest(testInputs, 0, g1p_input);
80 
81  // Allocate memory
82  const size_t fftsize = 512*sizeof(g1p_t);
83  const size_t memsize = grid.x*fftsize;
84 
85  g1p_t *in, *out;
86 
87  cudaMallocManaged(&in, memsize);
88  cudaMallocManaged(&out, memsize);
89 
90  // Copy input to device
91  for (int i=0; i<grid.x; i++) memcpy(in+i*512, g1p_input, fftsize);
92 
93  // Run multi-fft
94  elapsedTime = -clock();
95 
96  g1p_fft_wrapper<<<grid, block, sharedmem>>>(out, in);
97 
98  cudaDeviceSynchronize();
99  elapsedTime += clock();
100 
101  fprintf(stderr, "Kernel executed in %.5fs\n", elapsedTime * (1.0 / CLOCKS_PER_SEC) );
102  // Check for correctness, report errors
103  fprintf(stderr, "Hello, I still don't do error checking, duuude\n");
104 
105  // Deallocate
106  cudaFree(in);
107  cudaFree(out);
108  freeffttest_t(&testInputs);
109 
110 }
111 
112 
113 void init(){
114 
115 }
116 
117 int main(){
118  init();
119  printf("Debug\n");
120  FFTTest();
121 
122  return 0;
123 }
124 
125 // vim: ts=4 et sw=4 si
void init()
Definition: fftTest.cu:113
void FFTTest()
Definition: fftTest.cu:60
void unpackffttest(ffttest_t testInputs, int testIDX, g1p_t g1p_input[512])
Definition: fftTest.cu:31
void g1p_fromG1aHost(g1p_t &p, const g1a_t &a)
Definition: fftTest.cu:20
__managed__ g1p_t g1p_input[512]
Definition: fftTest.cu:13
void FFTTest_random()
Definition: fftTest.cu:55
__managed__ g1p_t g1p_expected[512]
Definition: fftTest.cu:13
int main()
Definition: fftTest.cu:117
bool g1a_iszeroHost(const g1a_t &a)
Definition: fftTest.cu:15
__managed__ g1p_t g1p_output[512]
Definition: fftTest.cu:13
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
Definition: g1p.cu:93
void freeffttest_t(ffttest_t *fftTest)
Definition: parseFFTTest.c:9
ffttest_t parseFFTTest(const char *filename)
Definition: parseFFTTest.c:136
uint768_t fftInput[POLYLEN *2]
Definition: parseFFTTest.h:13
unsigned int nTest
Definition: parseFFTTest.h:18
struct FFTTestCase * testCase
Definition: parseFFTTest.h:21
G1 point in affine coordinates.
Definition: g1.cuh:20
fp_t y
Definition: g1.cuh:21
fp_t x
Definition: g1.cuh:21
G1 point in projective coordinates.
Definition: g1.cuh:27
fp_t z
Definition: g1.cuh:28
fp_t x
Definition: g1.cuh:28
fp_t y
Definition: g1.cuh:28
uint64_t word[12]
Definition: parseFFTTest.h:5