FK20 CUDA
fp_add.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fp.cuh"
6 #include "fp_add.cuh"
7 
17 __device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y) {
18  uint64_t
19  x0 = x[0], y0 = y[0], z0,
20  x1 = x[1], y1 = y[1], z1,
21  x2 = x[2], y2 = y[2], z2,
22  x3 = x[3], y3 = y[3], z3,
23  x4 = x[4], y4 = y[4], z4,
24  x5 = x[5], y5 = y[5], z5;
25 
26  asm volatile (
27  "\n\t{"
28  "\n\t.reg .u64 z<6>, x<6>, y<6>;"
29  "\n\t.reg .u32 z6;"
30  "\n\t.reg .pred gt, nz;"
31 
32  "\n\tmov.u64 x0, %6;"
33  "\n\tmov.u64 x1, %7;"
34  "\n\tmov.u64 x2, %8;"
35  "\n\tmov.u64 x3, %9;"
36  "\n\tmov.u64 x4, %10;"
37  "\n\tmov.u64 x5, %11;"
38 
39  "\n\tmov.u64 y0, %12;"
40  "\n\tmov.u64 y1, %13;"
41  "\n\tmov.u64 y2, %14;"
42  "\n\tmov.u64 y3, %15;"
43  "\n\tmov.u64 y4, %16;"
44  "\n\tmov.u64 y5, %17;"
45 
46 FP_ADD(z, x, y)
47 
48  "\n\tmov.u64 %0, z0;"
49  "\n\tmov.u64 %1, z1;"
50  "\n\tmov.u64 %2, z2;"
51  "\n\tmov.u64 %3, z3;"
52  "\n\tmov.u64 %4, z4;"
53  "\n\tmov.u64 %5, z5;"
54 
55  "\n\t}"
56  :
57  "=l"(z0), "=l"(z1), "=l"(z2), "=l"(z3), "=l"(z4), "=l"(z5)
58  :
59  "l"(x0), "l"(x1), "l"(x2), "l"(x3), "l"(x4), "l"(x5),
60  "l"(y0), "l"(y1), "l"(y2), "l"(y3), "l"(y4), "l"(y5)
61  );
62 
63  z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3, z[4] = z4, z[5] = z5;
64 }
65 
66 // vim: ts=4 et sw=4 si
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
Definition: fp.cuh:14
__device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y)
Computes the sum of two residues x and y modulo p and stores it in z. Device only function.
Definition: fp_add.cu:17
#define FP_ADD(Z, X, Y)
PTX macro for addition of two residues modulo p. Z←X+Y.
Definition: fp_add.cuh:11