FK20 CUDA
fk20test_fft.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include <cstring>
6 #include <stdio.h>
7 #include <time.h>
8 
9 #include "fr.cuh"
10 #include "g1.cuh"
11 #include "fk20.cuh"
12 #include "fk20test.cuh"
13 #include "fk20_testvector.cuh"
14 
15 // GPU arrays for results and comparison with known answers
16 static __managed__ uint8_t cmp[16 * 512];
17 static __managed__ fr_t fr_tmp[16 * 512];
18 static __managed__ g1p_t g1p_tmp[512];
19 
20 
24 void FK20TestFFT() {
25  printf(">>>> FFT Tests\n");
26  // All tests operate on a local pointer to the global KAT arrays
28  h2h_fft(h, h_fft);
29  h_fft2h(h_fft, h);
32 }
33 
34 /*
35 The testing functions follow an common template, described in ./doc/fk20test.md
36 */
37 
44 void toeplitz_coefficients2toeplitz_coefficients_fft(fr_t toeplitz_coefficients_l[16][512],
45  fr_t toeplitz_coefficients_fft_l[16][512]) {
46  cudaError_t err;
47  bool pass = true;
48  CLOCKINIT;
49 
50  printf("=== RUN %s\n", "fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft");
51  memset(fr_tmp, 0xAA, 16 * 512 * sizeof(fr_t)); // Pattern on tmp dest.
52  for(int testIDX=0; testIDX<=1; testIDX++){
53  CLOCKSTART;
54  fr_fft_wrapper<<<16, 256, fr_sharedmem>>>(fr_tmp, (fr_t *)(toeplitz_coefficients_l));
55  CUDASYNC("fr_fft_wrapper");
56  CLOCKEND;
57  clearRes;
58  fr_eq_wrapper<<<256, 32>>>(cmp, 16 * 512, fr_tmp, (fr_t *)toeplitz_coefficients_fft_l);
59  CUDASYNC("fr_eq_wrapper");
60  if (testIDX == 0){
61  CMPCHECK(16 * 512)
62  PRINTPASS(pass);
63  }
64  else{
65  NEGCMPCHECK(16*512);
66  NEGPRINTPASS(pass);
67  }
68 
69  varMangle((fr_t*)toeplitz_coefficients_l, 16*512, 512);
70  }
71 }
72 
79 void h2h_fft(g1p_t h_l[512], g1p_t h_fft_l[512]) {
80  cudaError_t err;
81  bool pass = true;
82  CLOCKINIT;
83 
85  printf("=== RUN %s\n", "g1p_fft: h -> h_fft");
86  memset(g1p_tmp, 0xAA, 512 * sizeof(g1p_t)); // Pattern on tmp dest
87 
88  for(int testIDX=0; testIDX<=1; testIDX++){
89  CLOCKSTART;
90  g1p_fft_wrapper<<<1, 256, g1p_sharedmem>>>(g1p_tmp, h_l);
91  CUDASYNC("g1p_fft_wrapper");
92  CLOCKEND;
93  clearRes;
94  g1p_eq_wrapper<<<16, 32>>>(cmp, 512, g1p_tmp, h_fft_l);
95  CUDASYNC("g1p_eq_wrapper");
96 
97  if (testIDX == 0){
98  CMPCHECK(512)
99  PRINTPASS(pass);
100  }
101  else{
102  NEGCMPCHECK(512);
103  NEGPRINTPASS(pass);
104  }
105  varMangle(h_l, 512, 64);
106  }
107 }
108 
115 void h_fft2h(g1p_t h_fft_l[512], g1p_t h_l[512]) {
116  cudaError_t err;
117  bool pass = true;
118  CLOCKINIT;
119 
121  printf("=== RUN %s\n", "g1p_ift: h_fft -> h");
122  memset(g1p_tmp, 0xAA, 512 * sizeof(g1p_t)); // Pattern on tmp dest
123  for(int testIDX=0; testIDX<=1; testIDX++){
124  CLOCKSTART;
125  g1p_ift_wrapper<<<1, 256, g1p_sharedmem>>>(g1p_tmp, h_fft_l);
126  CUDASYNC("g1p_ift_wrapper");
127  CLOCKEND;
128  clearRes;
129  g1p_eq_wrapper<<<16, 32>>>(cmp, 512, g1p_tmp, h_l);
130  CUDASYNC("g1p_eq_wrapper");
131  // Check IFT result
132  if (testIDX == 0){
133  CMPCHECK(512)
134  PRINTPASS(pass);
135  }
136  else{
137  NEGCMPCHECK(512);
138  NEGPRINTPASS(pass);
139  }
140  varMangle(h_fft_l, 512, 64);
141  }
142 }
143 
150 void hext_fft2h(g1p_t hext_fft_l[512], g1p_t h_l[512]){
151  cudaError_t err;
152  bool pass = true;
153  CLOCKINIT;
154 
156  printf("=== RUN %s\n", "g1p_ift: hext_fft -> h");
157  memset(g1p_tmp,0xAA,512*sizeof(g1p_t)); // Pattern on tmp dest
158  for(int testIDX=0; testIDX<=1; testIDX++){
159  CLOCKSTART;
160  g1p_ift_wrapper<<<1, 256, g1p_sharedmem>>>(g1p_tmp, hext_fft_l);
161  CUDASYNC("g1p_ift_wrapper");
162  CLOCKEND;
163  clearRes;
164  g1p_eq_wrapper<<<8, 32>>>(cmp, 256, g1p_tmp, h_l); // Note: h, not hext, hence 256, not 512
165  CUDASYNC("g1p_eq_wrapper");
166  if (testIDX == 0){
167  CMPCHECK(256)
168  PRINTPASS(pass);
169  }
170  else{
171  NEGCMPCHECK(256);
172  NEGPRINTPASS(pass);
173  }
174  varMangle(hext_fft_l, 512, 64);
175  }
176 }
177 
184 void hext_fft2h_fft(g1p_t hext_fft_l[512], g1p_t h_fft_l[512]){
185  cudaError_t err;
186  bool pass = true;
187  CLOCKINIT;
188 
190  printf("=== RUN %s\n", "fk20_hext_fft2h_fft: hext_fft -> h_fft");
191  memset(g1p_tmp,0x88,512*sizeof(g1p_t)); // Pattern on tmp dest
192  for(int testIDX=0; testIDX<=1; testIDX++){
193  CLOCKSTART;
194  fk20_hext_fft2h_fft<<<1, 256, g1p_sharedmem>>>(g1p_tmp, hext_fft_l);
195  CUDASYNC("fk20_hext_fft2h_fft");
196  CLOCKEND;
197  clearRes;
198  g1p_eq_wrapper<<<16, 32>>>(cmp, 512, g1p_tmp, h_fft_l);
199  CUDASYNC("g1p_eq_wrapper");
200 
201  // Check FFT result
202  if (testIDX == 0){
203  CMPCHECK(512)
204  PRINTPASS(pass);
205  }
206  else{
207  NEGCMPCHECK(512);
208  NEGPRINTPASS(pass);
209  }
210  varMangle(hext_fft_l, 512, 64);
211  }
212 }
213 
214 // vim: ts=4 et sw=4 si
const size_t g1p_sharedmem
Definition: fk20.cuh:14
#define SET_SHAREDMEM(SZ, FN)
Definition: fk20.cuh:48
#define CUDASYNC(fmt,...)
Definition: fk20.cuh:39
__global__ void fk20_hext_fft2h_fft(g1p_t *h_fft, const g1p_t *hext_fft)
hext_fft -> h_fft
__managed__ g1p_t hext_fft[512 *512]
__managed__ g1p_t h_fft[512 *512]
__managed__ fr_t toeplitz_coefficients[512 *16][512]
__managed__ fr_t toeplitz_coefficients_fft[512 *16][512]
void varMangle(fr_t *target, size_t size, unsigned step)
swap elements at positions multiple of step. Nondestructive, call a second time to undo the changes
__managed__ g1p_t h[512 *512]
__managed__ uint8_t cmp[16 *512]
void h_fft2h(g1p_t h_fft_l[512], g1p_t h_l[512])
Test for g1p_ift: h_fft -> h.
void FK20TestFFT()
Definition: fk20test_fft.cu:24
void h2h_fft(g1p_t h_l[512], g1p_t h_fft_l[512])
Test for g1p_fft: h -> h_fft.
Definition: fk20test_fft.cu:79
void hext_fft2h_fft(g1p_t hext_fft_l[512], g1p_t h_fft_l[512])
Test for fk20_hext_fft2h_fft: hext_fft -> h_fft.
void toeplitz_coefficients2toeplitz_coefficients_fft(fr_t toeplitz_coefficients_l[16][512], fr_t toeplitz_coefficients_fft_l[16][512])
Test for fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft.
Definition: fk20test_fft.cu:44
void hext_fft2h(g1p_t hext_fft_l[512], g1p_t h_l[512])
Test for g1p_ift: hext_fft -> h.
uint64_t fr_t[4]
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t)....
Definition: fr.cuh:24
__global__ void g1p_fft_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_fft: FFT for arrays of g1p_t with length 512
Definition: g1p_fft.cu:336
__global__ void g1p_ift_wrapper(g1p_t *output, const g1p_t *input)
wrapper for g1p_ift: inverse FFT for arrays of g1p_t with length 512
Definition: g1p_fft.cu:349
__shared__ g1p_t g1p_tmp[]
G1 point in projective coordinates.
Definition: g1.cuh:27
#define CLOCKINIT
Definition: test.h:98
#define NEGCMPCHECK(LENGTH)
Definition: test.h:116
#define clearRes
Definition: test.h:87
#define PRINTPASS(pass)
Definition: test.h:25
#define CLOCKEND
Definition: test.h:100
#define CLOCKSTART
Definition: test.h:99
#define NEGPRINTPASS(pass)
Definition: test.h:32
#define CMPCHECK(LENGTH)
Definition: test.h:106