FK20-CUDAdocs/fft_test_8cu_source.html

 //testing for the fk20, loselly based on the fk20test_kat.cu

 #include <stdio.h>

 #include <stdio.h>

 #include <cuda.h>

 #include <time.h>

 #include "g1.cuh"

 #include "fk20.cuh"


 extern "C"{

     #include "parseFFTTest.h"

 }


 __managed__ g1p_t g1p_input[512], g1p_output[512], g1p_expected[512];


 bool g1a_iszeroHost(const g1a_t &a) {

     return (a.x[5] | a.x[4] | a.x[3] | a.x[2] | a.x[1] | a.x[0] |

             a.y[5] | a.y[4] | a.y[3] | a.y[2] | a.y[1] | a.y[0]) == 0;

 }


 void g1p_fromG1aHost(g1p_t &p, const g1a_t &a) {

     if (g1a_iszeroHost(a)) {

         p = { { 0, 0, 0, 0, 0, 0 }, { 1, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0 } };

     }

     for(int i=0; i<6; i++) p.x[i]=a.x[i];

     for(int i=0; i<6; i++) p.y[i]=a.y[i];

     //fp_one(p.z);

     p.z[0]=1;

     for(int i=2; i<6; i++) p.z[i]=0;

 }


 void unpackffttest(ffttest_t testInputs, int testIDX, g1p_t g1p_input[512]){

     g1a_t tmp;

     // First, read the 256 fft input elements

     for(int argidx=0; argidx<256; argidx++){

         /* Because of limitation in the API of BLST, the test-case generator only has access to

          * the affine representation of G1 elements -- where each element is represented as two Fp

          * elements. The g1p_fft uses projective representation, where an extra Fp element is used.

          * Note that FFTTestCase.fftInputp is TODO

          */


         for(int j=0; j<6; j++){

         tmp.x[j] = testInputs.testCase[testIDX].fftInput[argidx].word[j];

         tmp.y[j] = testInputs.testCase[testIDX].fftInput[argidx].word[j+6];

         }

         // Convert these g1a to g1p

         g1p_fromG1aHost(g1p_input[argidx], tmp);

     }


     // The last 256 elements are set to infinity due to the design of the reference Python implementation


     for(int i=256; i<512; i++)

         g1p_inf(g1p_input[i]);

 }


 void FFTTest_random(){

     // Generates tests from randomness

     return;

 }


 void FFTTest(){

     // Uses tests picked from actual use-cases, extracted from the instrumented Python implementation

     const dim3 block(256,1,1);

     const dim3 grid(512,1,1);

     const size_t sharedmem = 73728; //72 KiB


     clock_t elapsedTime;


     // Read data from testFFT.in using partseFFTTest

     const char inputFile[] = "testFFT.in";

     ffttest_t testInputs = parseFFTTest(inputFile);

     if (testInputs.nTest == 0){

         exit(-1);

     }

     else{

         fprintf(stderr, "<%s> Test inputs read: %d tests.\n", __func__, testInputs.nTest);

     }


     // Convert testcase into g1p format

     unpackffttest(testInputs, 0, g1p_input);


     // Allocate memory

     const size_t fftsize = 512*sizeof(g1p_t);

     const size_t memsize = grid.x*fftsize;


     g1p_t *in, *out;


     cudaMallocManaged(&in,  memsize);

     cudaMallocManaged(&out, memsize);


     // Copy input to device

     for (int i=0; i<grid.x; i++) memcpy(in+i*512, g1p_input, fftsize);


     // Run multi-fft

     elapsedTime = -clock();


     g1p_fft_wrapper<<<grid, block, sharedmem>>>(out, in);


     cudaDeviceSynchronize();

     elapsedTime += clock();


     fprintf(stderr, "Kernel executed in %.5fs\n", elapsedTime * (1.0 / CLOCKS_PER_SEC) );

     // Check for correctness, report errors

     fprintf(stderr, "Hello, I still don't do error checking, duuude\n");


     // Deallocate

     cudaFree(in);

     cudaFree(out);

     freeffttest_t(&testInputs);


 }


 void init(){


 }


 int main(){

     init();

     printf("Debug\n");

     FFTTest();


     return 0;

 }


 // vim: ts=4 et sw=4 si

init
void init()
Definition: fftTest.cu:113

FFTTest
void FFTTest()
Definition: fftTest.cu:60

unpackffttest
void unpackffttest(ffttest_t testInputs, int testIDX, g1p_t g1p_input[512])
Definition: fftTest.cu:31

g1p_fromG1aHost
void g1p_fromG1aHost(g1p_t &p, const g1a_t &a)
Definition: fftTest.cu:20

g1p_input
__managed__ g1p_t g1p_input[512]
Definition: fftTest.cu:13

FFTTest_random
void FFTTest_random()
Definition: fftTest.cu:55

g1p_expected
__managed__ g1p_t g1p_expected[512]
Definition: fftTest.cu:13

main
int main()
Definition: fftTest.cu:117

g1a_iszeroHost
bool g1a_iszeroHost(const g1a_t &a)
Definition: fftTest.cu:15

g1p_output
__managed__ g1p_t g1p_output[512]
Definition: fftTest.cu:13

fk20.cuh

g1.cuh

g1p_inf
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
Definition: g1p.cu:93

freeffttest_t
void freeffttest_t(ffttest_t *fftTest)
Definition: parseFFTTest.c:9

parseFFTTest
ffttest_t parseFFTTest(const char *filename)
Definition: parseFFTTest.c:136

parseFFTTest.h

FFTTestCase::fftInput
uint768_t fftInput[POLYLEN *2]
Definition: parseFFTTest.h:13

FFTTest
Definition: parseFFTTest.h:17

FFTTest::nTest
unsigned int nTest
Definition: parseFFTTest.h:18

FFTTest::testCase
struct FFTTestCase * testCase
Definition: parseFFTTest.h:21

g1a_t
G1 point in affine coordinates.
Definition: g1.cuh:20

g1a_t::y
fp_t y
Definition: g1.cuh:21

g1a_t::x
fp_t x
Definition: g1.cuh:21

g1p_t
G1 point in projective coordinates.
Definition: g1.cuh:27

g1p_t::z
fp_t z
Definition: g1.cuh:28

g1p_t::x
fp_t x
Definition: g1.cuh:28

g1p_t::y
fp_t y
Definition: g1.cuh:28

uint768::word
uint64_t word[12]
Definition: parseFFTTest.h:5