FK20-CUDAdocs/fk20_8cu_source.html

 // bls12_381: Arithmetic for BLS12-381

 // Copyright 2022-2023 Dag Arne Osvik

 // Copyright 2022-2023 Luan Cardoso dos Santos


 #include <stdio.h>


 #include "fr.cuh"

 #include "g1.cuh"

 #include "fk20.cuh"


 // Workspace in shared memory


 //extern __shared__ fr_t fr_tmp[];    // 16 KiB shared memory

 //extern __shared__ g1p_t g1p_tmp[];  // 72 KiB shared memory


 __global__ void fk20_setup2xext_fft(g1p_t *xext_fft, const g1p_t *setup) {

     //TODO: Not passing test, probably bad block indexing

     if (gridDim.x  !=  16) return;

     if (gridDim.y  !=   1) return;

     if (gridDim.z  !=   1) return;

     if (blockDim.x != 256) return;

     if (blockDim.y !=   1) return;

     if (blockDim.z !=   1) return;


     unsigned tid = threadIdx.x; // Thread number

     unsigned bid = blockIdx.x;  // Block number


     const int n = 4096, l = 16, k = 256;


     g1p_t *xext = xext_fft;


     int input = n - 1 - bid - l * tid;

     int output = 2*k * bid + tid;


     if (input >= 0)

         g1p_cpy(xext[output], setup[input]);

     else

         g1p_inf(xext[output]);


     // Part 1: extend with point at infinity, then perform G1 FFT


     __syncthreads();


     g1p_inf(xext[2*k*bid + k + tid]);


     g1p_fft(xext_fft, xext);  // 16 FFT-512

 }


 // These functions are syntax sugar.


 __global__ void fk20_hext_fft2hext(g1p_t *hext, const g1p_t *hext_fft) {

     g1p_ift(hext, hext_fft);

 }


 // fk20_h2h_fft(): h -> h_fft


 // parameters:

 // - in  h      array with 512*gridDim.x elements

 // - out h_fft  array with 512*gridDim.x elements


 __global__ void fk20_h2h_fft(g1p_t *h_fft, const g1p_t *h) {

     g1p_fft(h_fft, h);

 }


 // vim: ts=4 et sw=4 si

fk20_hext_fft2hext
__global__ void fk20_hext_fft2hext(g1p_t *hext, const g1p_t *hext_fft)
hext_fft -> hext
Definition: fk20.cu:73

fk20_setup2xext_fft
__global__ void fk20_setup2xext_fft(g1p_t *xext_fft, const g1p_t *setup)
setup -> xext_fft
Definition: fk20.cu:27

fk20_h2h_fft
__global__ void fk20_h2h_fft(g1p_t *h_fft, const g1p_t *h)
h -> h_fft
Definition: fk20.cu:92

fk20.cuh

xext_fft
__managed__ g1p_t xext_fft[16][512]
Definition: fk20_testvector.cu:24603

hext_fft
__managed__ g1p_t hext_fft[512 *512]
Definition: fk20_testvector.cu:82052

h_fft
__managed__ g1p_t h_fft[512 *512]
Definition: fk20_testvector.cu:87178

h
__managed__ g1p_t h[512 *512]
Definition: fk20_testvector.cu:84615

setup
__managed__ g1p_t setup[4097]
Definition: fk20_testvector.cu:4115

fr.cuh

g1.cuh

g1p_ift
__device__ void g1p_ift(g1p_t *output, const g1p_t *input)
Inverse FFT of size 512 over G1 with projective coordinates. Input and output arrays may overlap....
Definition: g1p_fft.cu:178

g1p_inf
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
Definition: g1p.cu:93

g1p_fft
__device__ void g1p_fft(g1p_t *output, const g1p_t *input)
FFT of size 512 over G1 with projective coordinates. Input and output arrays may overlap....
Definition: g1p_fft.cu:24

g1p_cpy
__device__ __host__ void g1p_cpy(g1p_t &p, const g1p_t &q)
Copy from q into p.
Definition: g1p.cu:67

g1p_t
G1 point in projective coordinates.
Definition: g1.cuh:27