FK20 CUDA
fp_x8.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fp.cuh"
6 #include "fp_x8.cuh"
7 
15 __device__ void fp_x8(fp_t &z, const fp_t &x) {
16  uint64_t
17  x0 = x[0], z0,
18  x1 = x[1], z1,
19  x2 = x[2], z2,
20  x3 = x[3], z3,
21  x4 = x[4], z4,
22  x5 = x[5], z5;
23 
24  asm volatile (
25  "\n\t{"
26  "\n\t.reg .u64 x<6>, z<6>;"
27  "\n\t.reg .u32 z6;"
28  "\n\t.reg .pred gt;"
29 
30  "\n\tmov.u64 x0, %6;"
31  "\n\tmov.u64 x1, %7;"
32  "\n\tmov.u64 x2, %8;"
33  "\n\tmov.u64 x3, %9;"
34  "\n\tmov.u64 x4, %10;"
35  "\n\tmov.u64 x5, %11;"
36 
37 FP_X8(z, x)
38 
39  "\n\tmov.u64 %0, z0;"
40  "\n\tmov.u64 %1, z1;"
41  "\n\tmov.u64 %2, z2;"
42  "\n\tmov.u64 %3, z3;"
43  "\n\tmov.u64 %4, z4;"
44  "\n\tmov.u64 %5, z5;"
45 
46  "\n\t}"
47  :
48  "=l"(z0), "=l"(z1), "=l"(z2), "=l"(z3), "=l"(z4), "=l"(z5)
49  :
50  "l"(x0), "l"(x1), "l"(x2), "l"(x3), "l"(x4), "l"(x5)
51  );
52 
53  z[0] = z0, z[1] = z1, z[2] = z2, z[3] = z3, z[4] = z4, z[5] = z5;
54 }
55 
56 // vim: ts=4 et sw=4 si
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
Definition: fp.cuh:14
__device__ void fp_x8(fp_t &z, const fp_t &x)
Multiplies x by 8 and stores the result into z.
Definition: fp_x8.cu:15
#define FP_X8(Z, X)
PTX macro for multiplication by 8. Stores in Z.
Definition: fp_x8.cuh:11