FK20 CUDA
fp_add.cuh
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #ifndef FP_ADD
6 
11 #define FP_ADD(Z, X, Y) \
12  /* z = x + y */ \
13  \
14  "\n\tadd.u64.cc "#Z"0, "#X"0, "#Y"0;" \
15  "\n\taddc.u64.cc "#Z"1, "#X"1, "#Y"1;" \
16  "\n\taddc.u64.cc "#Z"2, "#X"2, "#Y"2;" \
17  "\n\taddc.u64.cc "#Z"3, "#X"3, "#Y"3;" \
18  "\n\taddc.u64.cc "#Z"4, "#X"4, "#Y"4;" \
19  "\n\taddc.u64.cc "#Z"5, "#X"5, "#Y"5;" \
20  "\n\taddc.u32 z6, 0, 0;" \
21  \
22  /* gt = (z>>320) > (m>>320) */ \
23  /* nz = (z>>384) > 0 */ \
24  \
25  "\n\tsetp.gt.u64 gt, z5, 0x1a0111ea397fe69aU;" \
26  "\n\tsetp.ne.u32 nz, z6, 0;" \
27  \
28  /* If gt then subtract m */ \
29  \
30  "\n@gt\tsub.u64.cc "#Z"0, "#Z"0, 0xb9feffffffffaaabU;" \
31  "\n@gt\tsubc.u64.cc "#Z"1, "#Z"1, 0x1eabfffeb153ffffU;" \
32  "\n@gt\tsubc.u64.cc "#Z"2, "#Z"2, 0x6730d2a0f6b0f624U;" \
33  "\n@gt\tsubc.u64.cc "#Z"3, "#Z"3, 0x64774b84f38512bfU;" \
34  "\n@gt\tsubc.u64.cc "#Z"4, "#Z"4, 0x4b1ba7b6434bacd7U;" \
35  "\n@gt\tsubc.u64.cc "#Z"5, "#Z"5, 0x1a0111ea397fe69aU;" \
36  \
37  /* If nz then subtract mmu0 (= 9m) */ \
38  \
39  "\n@nz\tsub.u64.cc "#Z"0, "#Z"0, 0x89f6fffffffd0003U;" \
40  "\n@nz\tsubc.u64.cc "#Z"1, "#Z"1, 0x140bfff43bf3fffdU;" \
41  "\n@nz\tsubc.u64.cc "#Z"2, "#Z"2, 0xa0b767a8ac38a745U;" \
42  "\n@nz\tsubc.u64.cc "#Z"3, "#Z"3, 0x8831a7ac8fada8baU;" \
43  "\n@nz\tsubc.u64.cc "#Z"4, "#Z"4, 0xa3f8e5685da91392U;" \
44  "\n@nz\tsubc.u64.cc "#Z"5, "#Z"5, 0xea09a13c057f1b6cU;"
45 
46 #endif