FK20 CUDA
g1p_addsub.cu
Go to the documentation of this file.
1 // bls12_381: Arithmetic for BLS12-381
2 // Copyright 2022-2023 Dag Arne Osvik
3 // Copyright 2022-2023 Luan Cardoso dos Santos
4 
5 #include <stdio.h>
6 
7 #include "fp.cuh"
8 #include "g1.cuh"
9 
18 __device__ void g1p_addsub(g1p_t &p, g1p_t &q) {
19 
20 #ifndef NDEBUG
21  if (!g1p_isPoint(p) || !(g1p_isPoint(q))) {
22  //printf("ERROR in g1p_addsub(): Invalid point(s)\n");
23  //g1p_print("p: ", p);
24  //g1p_print("q: ", q);
25 
26  // return invalid points as result
27  fp_zero(p.x); fp_zero(q.x);
28  fp_zero(p.y); fp_zero(q.y);
29  fp_zero(p.z); fp_zero(q.z);
30 
31  return;
32  }
33 #endif
34 
35 
36  fp_t
37  &X1 = p.x, &Y1 = p.y, &Z1 = p.z,
38  &X2 = q.x, &Y2 = q.y, &Z2 = q.z,
39  t0, t1, t2, t3;
40 
41  //fp_print("X1 = ", X1);
42  //fp_print("Y1 = ", Y1);
43  //fp_print("Z1 = ", Z1);
44 
45  //fp_print("X2 = ", X2);
46  //fp_print("Y2 = ", Y2);
47  //fp_print("Z2 = ", Z2);
48  //printf("\n");
49 
50  fp_mul(t0, X1, X2); // t0
51  //fp_print("t0 = ", t0);
52 
53  fp_add(t3, X1, Y1); // t3
54  //fp_print("t3 = ", t3);
55 
56  fp_add(X1, X1, Z1); // td
57  //fp_print("td = ", X1);
58 
59  fp_add(t2, X2, Z2); // te
60  //fp_print("te = ", t2);
61 
62  fp_mul(X1, X1, t2); // tf
63  //fp_print("tf = ", X1);
64 
65 
66  fp_add(t1, Y2, Z2); // t9
67  //fp_print("t9 = ", t1);
68 
69  fp_mul(t2, Z1, Z2); // t2
70  //fp_print("t2 = ", t2);
71 
72  fp_add(Z1, Z1, Y1); // t8
73  //fp_print("t8 = ", Z1);
74 
75  fp_sub(Z2, Z2, Y2); // T9
76  //fp_print("T9 = ", Z2);
77  fp_mul(Z2, Z2, Z1); // Ta
78  //fp_print("Ta = ", Z2);
79  fp_mul(Z1, Z1, t1); // ta
80 
81  //fp_print("ta = ", Z1);
82  fp_sub(Z1, Z1, t2); // tc
83 
84  fp_add(t1, X2, Y2); // t4
85  //fp_print("t4 = ", t1);
86 
87  fp_mul(t1, t1, t3); // t5
88  //fp_print("t5 = ", t1);
89 
90  fp_sub(X1, X1, t2); // (th)
91  fp_sub(X2, X2, Y2); // T4
92  //fp_print("T4 = ", X2);
93  fp_mul(X2, X2, t3); // T5
94  //fp_print("T5 = ", X2);
95  fp_sub(X2, X2, t0); // T7
96 
97  fp_mul(Y1, Y1, Y2); // t1
98  //fp_print("t1 = ", Y1);
99 
100  fp_x12(Y2, t2); // tk
101  //fp_print("tk = ", Y2);
102 
103  fp_add(Z2, Z2, Y1); // Tc
104  fp_sub(Z2, Z2, t2); // Tc
105  //fp_print("Tc = ", Z2);
106 
107  fp_sub(t2, t1, t0); // (t7)
108  fp_sub(t2, t2, Y1); // t7
109  //fp_print("t7 = ", t2);
110 
111  fp_add(X2, X2, Y1); // T7
112  //fp_print("T7 = ", X2);
113 
114  fp_sub(Z1, Z1, Y1); // tc
115  //fp_print("tc = ", Z1);
116 
117  fp_sub(X1, X1, t0); // th
118  //fp_print("th = ", X1);
119 
120  fp_x3(t0, t0); // ti
121  //fp_print("ti = ", t0);
122 
123  fp_x12(X1, X1); // tn
124  //fp_print("tn = ", X1);
125 
126 
127  fp_add(t3, Y2, Y1); // tl
128  //fp_print("tl = ", t3);
129 
130  fp_sub(Y1, Y1, Y2); // tm
131  //fp_print("tm = ", Y1);
132 
133 
134  // Active (tag/var) = t7/t2, tc/Z1, ti/t0, tl/t3, tm/Y1, tn/X1, T7/X2, Tc/Z2
135  // Available (var) = t1, Y2
136 
137  fp_cpy(t1, X2); // T7
138 
139  fp_mma(X2, t1, t3, Z2, X1); // T7, -Tm=tl, Tc, tn
140  fp_neg(X2, X2); // X2
141 
142  fp_neg(Z2, Z2); // -Tc
143 
144  fp_mma(Z2, t1, t0, Z2, Y1); // T7, ti, -Tc, tm
145 
146  fp_mma(Y2, t0, X1, t3, Y1); // ti, tn, tl, tm
147 
148  fp_neg(X1, X1); // -tn
149  fp_mma(X1, t2, Y1, Z1, X1); // t7, tm, tc, -tn
150  fp_mma(Z1, t2, t0, Z1, t3); // t7, ti, tc, tl
151 
152  fp_cpy(Y1, Y2);
153 
154  //fp_print("X3 = ", X1);
155  //fp_print("Y3 = ", Y1);
156  //fp_print("Z3 = ", Z1);
157 
158  //fp_print("X4 = ", X2);
159  //fp_print("Y4 = ", Y2);
160  //fp_print("Z4 = ", Z2);
161 }
162 
163 // vim: ts=4 et sw=4 si
__device__ __host__ void fp_zero(fp_t &z)
Sets z to zero.
Definition: fp.cu:15
__device__ void fp_neg(fp_t &z, const fp_t &x)
Compute an additive inverse of a residue x modulo p. Stores in z. Subtracts x from the highest multip...
Definition: fp_neg.cu:16
__device__ void fp_mma(fp_t &z, const fp_t &v, const fp_t &w, const fp_t &x, const fp_t &y)
Fp multiply-multiply-add. Fast execution of z = (v*w + x*y) mod p The double-wide products are added ...
Definition: fp_mma.cu:20
__device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y)
Computes the sum of two residues x and y modulo p and stores it in z. Device only function.
Definition: fp_add.cu:17
__device__ void fp_x12(fp_t &z, const fp_t &x)
Multiplies the residue mod p x by 12 and stores the result into z.
Definition: fp_x12.cu:15
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
Definition: fp.cuh:14
__device__ void fp_mul(fp_t &z, const fp_t &x, const fp_t &y)
Multiplies two Fp residues x and y, stores in z.
Definition: fp_mul.cu:17
__device__ __host__ void fp_cpy(fp_t &z, const fp_t &x)
Copy from x into z.
Definition: fp_cpy.cu:14
__device__ void fp_x3(fp_t &z, const fp_t &x)
Multiplies x by 3 and stores the result into z.
Definition: fp_x3.cu:15
__device__ void fp_sub(fp_t &z, const fp_t &x, const fp_t &y)
Calculates the difference of two residues modulo p and stores it into z.
Definition: fp_sub.cu:16
__device__ bool g1p_isPoint(const g1p_t &p)
Check if the value stored in p is a valid point on the G1 curve.
Definition: g1p_ispoint.cu:34
__device__ void g1p_addsub(g1p_t &p, g1p_t &q)
Stores the sum and difference of p and q into p and q. Projective p and q, p,q ← p+q,...
Definition: g1p_addsub.cu:18
G1 point in projective coordinates.
Definition: g1.cuh:27
fp_t z
Definition: g1.cuh:28
fp_t x
Definition: g1.cuh:28
fp_t y
Definition: g1.cuh:28