FK20 CUDA
fk20_poly2toeplitz_coefficients.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fr.cuh"
6 #include "g1.cuh"
7 #include "fk20.cuh"
8 
24 
25  // gridDim.x is the number of rows
26  if (gridDim.y != 1) return;
27  if (gridDim.z != 1) return;
28  if (blockDim.x != 256) return;
29  if (blockDim.y != 1) return;
30  if (blockDim.z != 1) return;
31 
32  unsigned tid = threadIdx.x; // Thread number
33  unsigned bid = blockIdx.x; // Block number
34 
35  polynomial += 4096 * bid;
36  toeplitz_coefficients += 8192 * bid;
37 
38  for (int i=0; i<16; i++) {
39 
40  // Copy from the polynomial into half of the coefficient array
41 
42  unsigned src = tid*16+15-i;
43  unsigned dst = (tid+257)%512 + 512*i;
44 
45  if (tid > 0)
47  else
49 
50  __syncwarp(0xffffffff);
51 
52  // Zero the other half of coefficients before FFT
53 
54  fr_zero(toeplitz_coefficients[512*i+tid+1]);
55  }
56 }
57 
58 // vim: ts=4 et sw=4 si
__managed__ fr_t toeplitz_coefficients[512 *16][512]
__managed__ fr_t polynomial[512 *4096]
__global__ void fk20_poly2toeplitz_coefficients(fr_t *toeplitz_coefficients, const fr_t *polynomial)
polynomial -> toeplitz_coefficients
__device__ __host__ void fr_zero(fr_t &z)
Sets the value of z to zero.
Definition: fr.cu:15
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__device__ __host__ void fr_cpy(fr_t &z, const fr_t &x)
Copy from x into z.
Definition: fr_cpy.cu:14