FK20 CUDA
fp_sqr.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include "fp.cuh"
6 #include "fp_sqr.cuh"
7 #include "fp_reduce12.cuh"
8 
16 __device__ void fp_sqr(fp_t &z, const fp_t &x) {
17  uint64_t
18  x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5],
19  z0 = z[0], z1 = z[1], z2 = z[2], z3 = z[3], z4 = z[4], z5 = z[5];
20 
21  asm volatile (
22  "\n\t{"
23  "\n\t.reg .u64 z<6>, x<6>;"
24  "\n\t.reg .u64 u<10>, ua, ub;"
25  "\n\t.reg .u64 q<8>;"
26  "\n\t.reg .u64 r<7>;"
27 
28  "\n\tmov.u64 x0, %6;"
29  "\n\tmov.u64 x1, %7;"
30  "\n\tmov.u64 x2, %8;"
31  "\n\tmov.u64 x3, %9;"
32  "\n\tmov.u64 x4, %10;"
33  "\n\tmov.u64 x5, %11;"
34 
35 FP_SQR(u, x)
36 FP_REDUCE12(u)
37 
38  "\n\tmov.u64 %0, u0;"
39  "\n\tmov.u64 %1, u1;"
40  "\n\tmov.u64 %2, u2;"
41  "\n\tmov.u64 %3, u3;"
42  "\n\tmov.u64 %4, u4;"
43  "\n\tmov.u64 %5, u5;"
44 
45  "\n\t}"
46  :
47  "=l"(z0), "=l"(z1), "=l"(z2), "=l"(z3), "=l"(z4), "=l"(z5)
48  :
49  "l"(x0), "l"(x1), "l"(x2), "l"(x3), "l"(x4), "l"(x5)
50  );
51 
52  z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4; z[5] = z5;
53 }
54 
55 // vim: ts=4 et sw=4 si
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
Definition: fp.cuh:14
#define FP_REDUCE12(Z)
Wide reduction over 12 words.
Definition: fp_reduce12.cuh:12
__device__ void fp_sqr(fp_t &z, const fp_t &x)
Computes the square of the residue x modulo p and stores it in z.
Definition: fp_sqr.cu:16
#define FP_SQR(Z, X)
PTX macro for computing the square of the residue x modulo p. Stores in z.
Definition: fp_sqr.cuh:12