16 return (a.
x[5] | a.
x[4] | a.
x[3] | a.
x[2] | a.
x[1] | a.
x[0] |
17 a.
y[5] | a.
y[4] | a.
y[3] | a.
y[2] | a.
y[1] | a.
y[0]) == 0;
22 p = { { 0, 0, 0, 0, 0, 0 }, { 1, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0 } };
24 for(
int i=0; i<6; i++) p.
x[i]=a.
x[i];
25 for(
int i=0; i<6; i++) p.
y[i]=a.
y[i];
28 for(
int i=2; i<6; i++) p.
z[i]=0;
34 for(
int argidx=0; argidx<256; argidx++){
41 for(
int j=0; j<6; j++){
51 for(
int i=256; i<512; i++)
62 const dim3 block(256,1,1);
63 const dim3 grid(512,1,1);
64 const size_t sharedmem = 73728;
69 const char inputFile[] =
"testFFT.in";
71 if (testInputs.
nTest == 0){
75 fprintf(stderr,
"<%s> Test inputs read: %d tests.\n", __func__, testInputs.
nTest);
82 const size_t fftsize = 512*
sizeof(
g1p_t);
83 const size_t memsize = grid.x*fftsize;
87 cudaMallocManaged(&in, memsize);
88 cudaMallocManaged(&out, memsize);
91 for (
int i=0; i<grid.x; i++) memcpy(in+i*512,
g1p_input, fftsize);
94 elapsedTime = -clock();
96 g1p_fft_wrapper<<<grid, block, sharedmem>>>(out, in);
98 cudaDeviceSynchronize();
99 elapsedTime += clock();
101 fprintf(stderr,
"Kernel executed in %.5fs\n", elapsedTime * (1.0 / CLOCKS_PER_SEC) );
103 fprintf(stderr,
"Hello, I still don't do error checking, duuude\n");
void unpackffttest(ffttest_t testInputs, int testIDX, g1p_t g1p_input[512])
void g1p_fromG1aHost(g1p_t &p, const g1a_t &a)
__managed__ g1p_t g1p_input[512]
__managed__ g1p_t g1p_expected[512]
bool g1a_iszeroHost(const g1a_t &a)
__managed__ g1p_t g1p_output[512]
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
void freeffttest_t(ffttest_t *fftTest)
ffttest_t parseFFTTest(const char *filename)
uint768_t fftInput[POLYLEN *2]
struct FFTTestCase * testCase
G1 point in affine coordinates.
G1 point in projective coordinates.