FK20 CUDA
fp_sqr.cuh
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #ifndef FP_SQR
6 
12 #define FP_SQR(Z, X) \
13 \
14  "\n\tmul.lo.u64 "#Z"5, "#X"0, "#X"5 ; mul.hi.u64 "#Z"6, "#X"0, "#X"5 ;" \
15 \
16  "\n\tmul.lo.u64 "#Z"4, "#X"0, "#X"4 ; mad.hi.u64.cc "#Z"5, "#X"0, "#X"4, "#Z"5;" \
17  "\n\tmadc.lo.u64.cc "#Z"6, "#X"1, "#X"5, "#Z"6; madc.hi.u64 "#Z"7, "#X"1, "#X"5, 0;" \
18 \
19  "\n\tmul.lo.u64 "#Z"3, "#X"0, "#X"3 ; mad.hi.u64.cc "#Z"4, "#X"0, "#X"3, "#Z"4;" \
20  "\n\tmadc.lo.u64.cc "#Z"5, "#X"1, "#X"4, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"1, "#X"4, "#Z"6;" \
21  "\n\tmadc.lo.u64.cc "#Z"7, "#X"2, "#X"5, "#Z"7; madc.hi.u64 "#Z"8, "#X"2, "#X"5, 0;" \
22 \
23  "\n\tmul.lo.u64 "#Z"2, "#X"0, "#X"2 ; mad.hi.u64.cc "#Z"3, "#X"0, "#X"2, "#Z"3;" \
24  "\n\tmadc.lo.u64.cc "#Z"4, "#X"1, "#X"3, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"1, "#X"3, "#Z"5;" \
25  "\n\tmadc.lo.u64.cc "#Z"6, "#X"2, "#X"4, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"2, "#X"4, "#Z"7;" \
26  "\n\tmadc.lo.u64.cc "#Z"8, "#X"3, "#X"5, "#Z"8; madc.hi.u64 "#Z"9, "#X"3, "#X"5, 0;" \
27 \
28  "\n\tmul.lo.u64 "#Z"1, "#X"0, "#X"1 ; mad.hi.u64.cc "#Z"2, "#X"0, "#X"1, "#Z"2;" \
29  "\n\tmadc.lo.u64.cc "#Z"3, "#X"1, "#X"2, "#Z"3; madc.hi.u64.cc "#Z"4, "#X"1, "#X"2, "#Z"4;" \
30  "\n\tmadc.lo.u64.cc "#Z"5, "#X"2, "#X"3, "#Z"5; madc.hi.u64.cc "#Z"6, "#X"2, "#X"3, "#Z"6;" \
31  "\n\tmadc.lo.u64.cc "#Z"7, "#X"3, "#X"4, "#Z"7; madc.hi.u64.cc "#Z"8, "#X"3, "#X"4, "#Z"8;" \
32  "\n\tmadc.lo.u64.cc "#Z"9, "#X"4, "#X"5, "#Z"9; madc.hi.u64 "#Z"a, "#X"4, "#X"5, 0;" \
33 \
34  "\n\tadd.u64.cc "#Z"1, "#Z"1, "#Z"1;" \
35  "\n\taddc.u64.cc "#Z"2, "#Z"2, "#Z"2;" \
36  "\n\taddc.u64.cc "#Z"3, "#Z"3, "#Z"3;" \
37  "\n\taddc.u64.cc "#Z"4, "#Z"4, "#Z"4;" \
38  "\n\taddc.u64.cc "#Z"5, "#Z"5, "#Z"5;" \
39  "\n\taddc.u64.cc "#Z"6, "#Z"6, "#Z"6;" \
40  "\n\taddc.u64.cc "#Z"7, "#Z"7, "#Z"7;" \
41  "\n\taddc.u64.cc "#Z"8, "#Z"8, "#Z"8;" \
42  "\n\taddc.u64.cc "#Z"9, "#Z"9, "#Z"9;" \
43  "\n\taddc.u64.cc "#Z"a, "#Z"a, "#Z"a;" \
44  "\n\taddc.u64 "#Z"b, 0, 0;" \
45 \
46  "\n\tmul.lo.u64 "#Z"0, "#X"0, "#X"0 ; mad.hi.u64.cc "#Z"1, "#X"0, "#X"0, "#Z"1;" \
47  "\n\tmadc.lo.u64.cc "#Z"2, "#X"1, "#X"1, "#Z"2; madc.hi.u64.cc "#Z"3, "#X"1, "#X"1, "#Z"3;" \
48  "\n\tmadc.lo.u64.cc "#Z"4, "#X"2, "#X"2, "#Z"4; madc.hi.u64.cc "#Z"5, "#X"2, "#X"2, "#Z"5;" \
49  "\n\tmadc.lo.u64.cc "#Z"6, "#X"3, "#X"3, "#Z"6; madc.hi.u64.cc "#Z"7, "#X"3, "#X"3, "#Z"7;" \
50  "\n\tmadc.lo.u64.cc "#Z"8, "#X"4, "#X"4, "#Z"8; madc.hi.u64.cc "#Z"9, "#X"4, "#X"4, "#Z"9;" \
51  "\n\tmadc.lo.u64.cc "#Z"a, "#X"5, "#X"5, "#Z"a; madc.hi.u64 "#Z"b, "#X"5, "#X"5, "#Z"b;"
52 
53 #endif
54 // vim: ts=4 et sw=4 si