FK20-CUDAdocs/g1p__addsub_8cu_source.html

 // bls12_381: Arithmetic for BLS12-381

 // Copyright 2022-2023 Dag Arne Osvik

 // Copyright 2022-2023 Luan Cardoso dos Santos


 #include <stdio.h>


 #include "fp.cuh"

 #include "g1.cuh"


 __device__ void g1p_addsub(g1p_t &p, g1p_t &q) {


 #ifndef NDEBUG

     if (!g1p_isPoint(p) || !(g1p_isPoint(q))) {

         //printf("ERROR in g1p_addsub(): Invalid point(s)\n");

         //g1p_print("p: ", p);

         //g1p_print("q: ", q);


         // return invalid points as result

         fp_zero(p.x); fp_zero(q.x);

         fp_zero(p.y); fp_zero(q.y);

         fp_zero(p.z); fp_zero(q.z);


         return;

     }

 #endif


     fp_t

         &X1 = p.x, &Y1 = p.y, &Z1 = p.z,

         &X2 = q.x, &Y2 = q.y, &Z2 = q.z,

         t0, t1, t2, t3;


     //fp_print("X1 = ",  X1);

     //fp_print("Y1 = ",  Y1);

     //fp_print("Z1 = ",  Z1);


     //fp_print("X2 = ",  X2);

     //fp_print("Y2 = ",  Y2);

     //fp_print("Z2 = ",  Z2);

     //printf("\n");


     fp_mul(t0, X1, X2); // t0

     //fp_print("t0 = ",  t0);


     fp_add(t3, X1, Y1); // t3

     //fp_print("t3 = ",  t3);


     fp_add(X1, X1, Z1); // td

     //fp_print("td = ",  X1);


     fp_add(t2, X2, Z2); // te

     //fp_print("te = ",  t2);


     fp_mul(X1, X1, t2); // tf

     //fp_print("tf = ",  X1);


     fp_add(t1, Y2, Z2); // t9

     //fp_print("t9 = ",  t1);


     fp_mul(t2, Z1, Z2); // t2

     //fp_print("t2 = ",  t2);


     fp_add(Z1, Z1, Y1); // t8

     //fp_print("t8 = ",  Z1);


     fp_sub(Z2, Z2, Y2); // T9

     //fp_print("T9 = ",  Z2);

     fp_mul(Z2, Z2, Z1); // Ta

     //fp_print("Ta = ",  Z2);

     fp_mul(Z1, Z1, t1); // ta


     //fp_print("ta = ",  Z1);

     fp_sub(Z1, Z1, t2); // tc


     fp_add(t1, X2, Y2); // t4

     //fp_print("t4 = ",  t1);


     fp_mul(t1, t1, t3); // t5

     //fp_print("t5 = ",  t1);


     fp_sub(X1, X1, t2); // (th)

     fp_sub(X2, X2, Y2); // T4

     //fp_print("T4 = ",  X2);

     fp_mul(X2, X2, t3); // T5

     //fp_print("T5 = ",  X2);

     fp_sub(X2, X2, t0); // T7


     fp_mul(Y1, Y1, Y2); // t1

     //fp_print("t1 = ",  Y1);


     fp_x12(Y2, t2);     // tk

     //fp_print("tk = ",  Y2);


     fp_add(Z2, Z2, Y1); // Tc

     fp_sub(Z2, Z2, t2); // Tc

     //fp_print("Tc = ",  Z2);


     fp_sub(t2, t1, t0); // (t7)

     fp_sub(t2, t2, Y1); // t7

     //fp_print("t7 = ",  t2);


     fp_add(X2, X2, Y1); // T7

     //fp_print("T7 = ",  X2);


     fp_sub(Z1, Z1, Y1); // tc

     //fp_print("tc = ",  Z1);


     fp_sub(X1, X1, t0); // th

     //fp_print("th = ",  X1);


     fp_x3(t0, t0);      // ti

     //fp_print("ti = ",  t0);


     fp_x12(X1, X1);     // tn

     //fp_print("tn = ",  X1);


     fp_add(t3, Y2, Y1); // tl

     //fp_print("tl = ",  t3);


     fp_sub(Y1, Y1, Y2); // tm

     //fp_print("tm = ",  Y1);


     // Active (tag/var) = t7/t2, tc/Z1, ti/t0, tl/t3, tm/Y1, tn/X1, T7/X2, Tc/Z2

     // Available (var) = t1, Y2


     fp_cpy(t1, X2); // T7


     fp_mma(X2, t1, t3, Z2, X1); // T7, -Tm=tl, Tc, tn

     fp_neg(X2, X2); // X2


     fp_neg(Z2, Z2); // -Tc


     fp_mma(Z2, t1, t0, Z2, Y1); // T7, ti, -Tc, tm


     fp_mma(Y2, t0, X1, t3, Y1); // ti, tn, tl, tm


     fp_neg(X1, X1); // -tn

     fp_mma(X1, t2, Y1, Z1, X1); // t7, tm, tc, -tn

     fp_mma(Z1, t2, t0, Z1, t3); // t7, ti, tc, tl


     fp_cpy(Y1, Y2);


     //fp_print("X3 = ",  X1);

     //fp_print("Y3 = ",  Y1);

     //fp_print("Z3 = ",  Z1);


     //fp_print("X4 = ",  X2);

     //fp_print("Y4 = ",  Y2);

     //fp_print("Z4 = ",  Z2);

 }


 // vim: ts=4 et sw=4 si

fp_zero
__device__ __host__ void fp_zero(fp_t &z)
Sets z to zero.
Definition: fp.cu:15

fp.cuh

fp_neg
__device__ void fp_neg(fp_t &z, const fp_t &x)
Compute an additive inverse of a residue x modulo p. Stores in z. Subtracts x from the highest multip...
Definition: fp_neg.cu:16

fp_mma
__device__ void fp_mma(fp_t &z, const fp_t &v, const fp_t &w, const fp_t &x, const fp_t &y)
Fp multiply-multiply-add. Fast execution of z = (v*w + x*y) mod p The double-wide products are added ...
Definition: fp_mma.cu:20

fp_add
__device__ void fp_add(fp_t &z, const fp_t &x, const fp_t &y)
Computes the sum of two residues x and y modulo p and stores it in z. Device only function.
Definition: fp_add.cu:17

fp_x12
__device__ void fp_x12(fp_t &z, const fp_t &x)
Multiplies the residue mod p x by 12 and stores the result into z.
Definition: fp_x12.cu:15

fp_t
uint64_t fp_t[6]
Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element li...
Definition: fp.cuh:14

fp_mul
__device__ void fp_mul(fp_t &z, const fp_t &x, const fp_t &y)
Multiplies two Fp residues x and y, stores in z.
Definition: fp_mul.cu:17

fp_cpy
__device__ __host__ void fp_cpy(fp_t &z, const fp_t &x)
Copy from x into z.
Definition: fp_cpy.cu:14

fp_x3
__device__ void fp_x3(fp_t &z, const fp_t &x)
Multiplies x by 3 and stores the result into z.
Definition: fp_x3.cu:15

fp_sub
__device__ void fp_sub(fp_t &z, const fp_t &x, const fp_t &y)
Calculates the difference of two residues modulo p and stores it into z.
Definition: fp_sub.cu:16

g1.cuh

g1p_isPoint
__device__ bool g1p_isPoint(const g1p_t &p)
Check if the value stored in p is a valid point on the G1 curve.
Definition: g1p_ispoint.cu:34

g1p_addsub
__device__ void g1p_addsub(g1p_t &p, g1p_t &q)
Stores the sum and difference of p and q into p and q. Projective p and q, p,q ← p+q,...
Definition: g1p_addsub.cu:18

g1p_t
G1 point in projective coordinates.
Definition: g1.cuh:27

g1p_t::z
fp_t z
Definition: g1.cuh:28

g1p_t::x
fp_t x
Definition: g1.cuh:28

g1p_t::y
fp_t y
Definition: g1.cuh:28