17 printf(
"=== RUN %s\n", __func__);
50 printf(
"(%d,%d,%d,%d): FAILED\n", i, j, k, l);
58 printf(
"%ld tests\n", count);
__device__ bool fp_neq(const fp_t &x, const fp_t &y)
Compares two fp_t residues.
__device__ void fp_mma(fp_t &z, const fp_t &v, const fp_t &w, const fp_t &x, const fp_t &y)
Fp multiply-multiply-add. Fast execution of z = (v*w + x*y) mod p The double-wide products are added ...
__device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y)
Computes the sum of two residues x and y modulo p and stores it in z. Device only function.
__device__ void fp_mul(fp_t &z, const fp_t &x, const fp_t &y)
Multiplies two Fp residues x and y, stores in z.
__device__ __host__ void fp_cpy(fp_t &z, const fp_t &x)
Copy from x into z.
__managed__ testval_t testval[TESTVALS]
__global__ void FpTestMMA(testval_t *testval)
Test for multiply-multiply-add. Compare with current standalone implementation of multiplication adn ...