FK20 CUDA
fk20.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include <stdio.h>
6 
7 #include "fr.cuh"
8 #include "g1.cuh"
9 #include "fk20.cuh"
10 
11 // Workspace in shared memory
12 
13 //extern __shared__ fr_t fr_tmp[]; // 16 KiB shared memory
14 //extern __shared__ g1p_t g1p_tmp[]; // 72 KiB shared memory
15 
17 
27 __global__ void fk20_setup2xext_fft(g1p_t *xext_fft, const g1p_t *setup) {
28  //TODO: Not passing test, probably bad block indexing
29  if (gridDim.x != 16) return;
30  if (gridDim.y != 1) return;
31  if (gridDim.z != 1) return;
32  if (blockDim.x != 256) return;
33  if (blockDim.y != 1) return;
34  if (blockDim.z != 1) return;
35 
36  unsigned tid = threadIdx.x; // Thread number
37  unsigned bid = blockIdx.x; // Block number
38 
39  const int n = 4096, l = 16, k = 256;
40 
41  g1p_t *xext = xext_fft;
42 
43  int input = n - 1 - bid - l * tid;
44  int output = 2*k * bid + tid;
45 
46  if (input >= 0)
47  g1p_cpy(xext[output], setup[input]);
48  else
49  g1p_inf(xext[output]);
50 
51  // Part 1: extend with point at infinity, then perform G1 FFT
52 
53  __syncthreads();
54 
55  g1p_inf(xext[2*k*bid + k + tid]);
56 
57  g1p_fft(xext_fft, xext); // 16 FFT-512
58 }
59 
60 
61 
63 // These functions are syntax sugar.
65 
73 __global__ void fk20_hext_fft2hext(g1p_t *hext, const g1p_t *hext_fft) {
74  g1p_ift(hext, hext_fft);
75 }
76 
78 
79 // fk20_h2h_fft(): h -> h_fft
80 
81 // parameters:
82 // - in h array with 512*gridDim.x elements
83 // - out h_fft array with 512*gridDim.x elements
84 
92 __global__ void fk20_h2h_fft(g1p_t *h_fft, const g1p_t *h) {
93  g1p_fft(h_fft, h);
94 }
95 
96 // vim: ts=4 et sw=4 si
__global__ void fk20_hext_fft2hext(g1p_t *hext, const g1p_t *hext_fft)
hext_fft -> hext
Definition: fk20.cu:73
__global__ void fk20_setup2xext_fft(g1p_t *xext_fft, const g1p_t *setup)
setup -> xext_fft
Definition: fk20.cu:27
__global__ void fk20_h2h_fft(g1p_t *h_fft, const g1p_t *h)
h -> h_fft
Definition: fk20.cu:92
__managed__ g1p_t xext_fft[16][512]
__managed__ g1p_t hext_fft[512 *512]
__managed__ g1p_t h_fft[512 *512]
__managed__ g1p_t h[512 *512]
__managed__ g1p_t setup[4097]
__device__ void g1p_ift(g1p_t *output, const g1p_t *input)
Inverse FFT of size 512 over G1 with projective coordinates. Input and output arrays may overlap....
Definition: g1p_fft.cu:178
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
Definition: g1p.cu:93
__device__ void g1p_fft(g1p_t *output, const g1p_t *input)
FFT of size 512 over G1 with projective coordinates. Input and output arrays may overlap....
Definition: g1p_fft.cu:24
__device__ __host__ void g1p_cpy(g1p_t &p, const g1p_t &q)
Copy from q into p.
Definition: g1p.cu:67
G1 point in projective coordinates.
Definition: g1.cuh:27