FK20 CUDA
fptest_mma.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fp.cuh"
6 #include "fptest.cuh"
7 
15 __global__ void FpTestMMA(testval_t *testval) {
16 
17  printf("=== RUN %s\n", __func__);
18 
19  bool pass = true;
20  size_t count = 0;
21 
22  for (int i=880; i<TESTVALS; i++) {
23  uint64_t v[6];
24 
25  fp_cpy(v, testval[i]);
26 
27  for (int j=i+1; j<TESTVALS; j++) {
28  uint64_t w[6], t[6];
29 
30  fp_cpy(w, testval[j]);
31  fp_mul(t, v, w);
32 
33  for (int k=j+1; k<TESTVALS; k++) {
34  uint64_t x[6];
35 
36  fp_cpy(x, testval[k]);
37 
38  for (int l=k+1; l<TESTVALS; l++) {
39  uint64_t y[6], u[6];
40 
41  fp_cpy(y, testval[l]);
42  fp_mul(u, x, y);
43  fp_add(u, u, t);
44 
45  fp_mma(y, v, w, x, y);
46 
47  if (fp_neq(u, y)) {
48  pass = false;
49 
50  printf("(%d,%d,%d,%d): FAILED\n", i, j, k, l);
51  }
52 
53  ++count;
54  }
55  }
56  }
57  }
58  printf("%ld tests\n", count);
59 
60  PRINTPASS(pass);
61 }
62 
63 // vim: ts=4 et sw=4 si
__device__ bool fp_neq(const fp_t &x, const fp_t &y)
Compares two fp_t residues.
Definition: fp_neq.cu:14
__device__ void fp_mma(fp_t &z, const fp_t &v, const fp_t &w, const fp_t &x, const fp_t &y)
Fp multiply-multiply-add. Fast execution of z = (v*w + x*y) mod p The double-wide products are added ...
Definition: fp_mma.cu:20
__device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y)
Computes the sum of two residues x and y modulo p and stores it in z. Device only function.
Definition: fp_add.cu:17
__device__ void fp_mul(fp_t &z, const fp_t &x, const fp_t &y)
Multiplies two Fp residues x and y, stores in z.
Definition: fp_mul.cu:17
__device__ __host__ void fp_cpy(fp_t &z, const fp_t &x)
Copy from x into z.
Definition: fp_cpy.cu:14
__managed__ testval_t testval[TESTVALS]
Definition: fptest.cu:8
#define TESTVALS
Definition: fptest.cuh:13
__global__ void FpTestMMA(testval_t *testval)
Test for multiply-multiply-add. Compare with current standalone implementation of multiplication adn ...
Definition: fptest_mma.cu:15
#define PRINTPASS(pass)
Definition: test.h:25