FK20-CUDAdocs/g1p_8cu_source.html

 // bls12_381: Arithmetic for BLS12-381

 // Copyright 2022-2023 Dag Arne Osvik

 // Copyright 2022-2023 Luan Cardoso dos Santos


 #include <stdio.h>


 #include "fp.cuh"

 #include "g1.cuh"


 __device__ void g1p_toUint64(const g1p_t &p, uint64_t *x, uint64_t *y, uint64_t *z) {

     fp_toUint64(x, p.x);

     fp_toUint64(y, p.y);

     fp_toUint64(z, p.z);

 }


 __device__ __host__ void g1p_fromUint64(g1p_t &p, uint64_t *x, uint64_t *y, uint64_t *z) {

     fp_fromUint64(p.x, x);

     fp_fromUint64(p.y, y);

     fp_fromUint64(p.z, z);

 }


 __device__ void g1p_fromG1a(g1p_t &p, const g1a_t &a) {

     if (fp_iszero(a.x) && fp_iszero(a.y)) {

         g1p_inf(p);

     }

     fp_fromUint64(p.x, a.x);

     fp_fromUint64(p.y, a.y);

     fp_one(p.z);

 }


 __device__ __host__ void g1p_cpy(g1p_t &p, const g1p_t &q) {

     fp_cpy(p.x, q.x);

     fp_cpy(p.y, q.y);

     fp_cpy(p.z, q.z);

 }


 __device__ __host__ void g1p_print(const char *s, const g1p_t &p) {

     printf("%s", s);

     printf("#x%016lx%016lx%016lx%016lx%016lx%016lx ",  p.x[5], p.x[4], p.x[3], p.x[2], p.x[1], p.x[0]);

     printf("#x%016lx%016lx%016lx%016lx%016lx%016lx ",  p.y[5], p.y[4], p.y[3], p.y[2], p.y[1], p.y[0]);

     printf("#x%016lx%016lx%016lx%016lx%016lx%016lx\n", p.z[5], p.z[4], p.z[3], p.z[2], p.z[1], p.z[0]);

 }


 __device__ __host__ void g1p_inf(g1p_t &p) {

     for (int i=0; i<6; i++)

         p.x[i] = p.y[i] = p.z[i] = 0;


     p.y[0] = 1;

 };


 __device__ __host__ void g1p_gen(g1p_t &p) {

     p.x[5] = 0x17F1D3A73197D794;

     p.x[4] = 0x2695638C4FA9AC0F;

     p.x[3] = 0xC3688C4F9774B905;

     p.x[2] = 0xA14E3A3F171BAC58;

     p.x[1] = 0x6C55E83FF97A1AEF;

     p.x[0] = 0xFB3AF00ADB22C6BB;


     p.y[5] = 0x08B3F481E3AAA0F1;

     p.y[4] = 0xA09E30ED741D8AE4;

     p.y[3] = 0xFCF5E095D5D00AF6;

     p.y[2] = 0x00DB18CB2C04B3ED;

     p.y[1] = 0xD03CC744A2888AE4;

     p.y[0] = 0x0CAA232946C5E7E1;


     p.z[5] = 0;

     p.z[4] = 0;

     p.z[3] = 0;

     p.z[2] = 0;

     p.z[1] = 0;

     p.z[0] = 1;

 };


 // Kernel wrappers for device-side functions


 __global__ void g1p_eq_wrapper(uint8_t *eq, size_t count, const g1p_t *p, const g1p_t *q) {


     unsigned tid = 0;   tid += blockIdx.z;

     tid *= gridDim.y;   tid += blockIdx.y;

     tid *= gridDim.x;   tid += blockIdx.x;

     tid *= blockDim.z;  tid += threadIdx.z;

     tid *= blockDim.y;  tid += threadIdx.y;

     tid *= blockDim.x;  tid += threadIdx.x;


     __syncthreads();


     unsigned step = gridDim.z * gridDim.y * gridDim.x

                 * blockDim.z * blockDim.y * blockDim.x;


     for (unsigned i=tid; i<count; i+=step)

         eq[i] = g1p_eq(p[i], q[i]) ? 1 : 0;

 }


 __global__ void g1a_fromG1p_wrapper(g1a_t *a, size_t count, const g1p_t *p) {


     unsigned tid = 0;   tid += blockIdx.z;

     tid *= gridDim.y;   tid += blockIdx.y;

     tid *= gridDim.x;   tid += blockIdx.x;

     tid *= blockDim.z;  tid += threadIdx.z;

     tid *= blockDim.y;  tid += threadIdx.y;

     tid *= blockDim.x;  tid += threadIdx.x;


     unsigned step = gridDim.z * gridDim.y * gridDim.x

                 * blockDim.z * blockDim.y * blockDim.x;


     for (unsigned i=tid; i<count; i+=step)

         g1a_fromG1p(*a, *p);

 }


 // vim: ts=4 et sw=4 si

fp_toUint64
__device__ void fp_toUint64(uint64_t *z, const fp_t &x)
Converts from residue modulo p (fp_t) to uint64_t[6]. The converted value is in canonical form.
Definition: fp.cu:75

fp_one
__device__ __host__ void fp_one(fp_t &z)
Sets z to one.
Definition: fp.cu:26

fp_fromUint64
__device__ __host__ void fp_fromUint64(fp_t &z, const uint64_t *x)
Converts uint64_t[6] to fp_t. After this operation, z represents x mod p.
Definition: fp.cu:58

fp.cuh

fp_iszero
__device__ bool fp_iszero(const fp_t &x)
Checks if the residue x modulo p is congruent to zero.
Definition: fp_iszero.cu:13

fp_cpy
__device__ __host__ void fp_cpy(fp_t &z, const fp_t &x)
Copy from x into z.
Definition: fp_cpy.cu:14

g1.cuh

g1p_eq
__device__ bool g1p_eq(const g1p_t &p, const g1p_t &q)
Compares two projective points returns true when equal. This function compares if both parameters rep...
Definition: g1p_compare.cu:23

g1a_fromG1p
__device__ void g1a_fromG1p(g1a_t &a, const g1p_t &p)
Converts a point in projective coordinates into affine coordinates.
Definition: g1a.cu:48

g1p_fromG1a
__device__ void g1p_fromG1a(g1p_t &p, const g1a_t &a)
Convert a point in affine coordinates to projective coordinates.
Definition: g1p.cu:51

g1p_inf
__device__ __host__ void g1p_inf(g1p_t &p)
Set p to the point-at-infinity (0,1,0)
Definition: g1p.cu:93

g1p_toUint64
__device__ void g1p_toUint64(const g1p_t &p, uint64_t *x, uint64_t *y, uint64_t *z)
Converts G1 point into arrays of uint64_t. Each array must be uint64_t[6] This function does not vali...
Definition: g1p.cu:21

g1p_fromUint64
__device__ __host__ void g1p_fromUint64(g1p_t &p, uint64_t *x, uint64_t *y, uint64_t *z)
Converts arrays of uint64_t into a G1 point. Each array must be uint64_t[6] This function does not va...
Definition: g1p.cu:38

g1p_eq_wrapper
__global__ void g1p_eq_wrapper(uint8_t *eq, size_t count, const g1p_t *p, const g1p_t *q)
Kernel wrapper, host-callable comparison of arrays of g1p_t.
Definition: g1p.cu:140

g1a_fromG1p_wrapper
__global__ void g1a_fromG1p_wrapper(g1a_t *a, size_t count, const g1p_t *p)
Kernel wrappers, host-callable conversion of points in projective coordinates into affine coordinates...
Definition: g1p.cu:166

g1p_gen
__device__ __host__ void g1p_gen(g1p_t &p)
Sets p to the generator point G1 of bls12_381.
Definition: g1p.cu:106

g1p_cpy
__device__ __host__ void g1p_cpy(g1p_t &p, const g1p_t &q)
Copy from q into p.
Definition: g1p.cu:67

g1p_print
__device__ __host__ void g1p_print(const char *s, const g1p_t &p)
Print a standard representation of p, preceded by the user-set string s.
Definition: g1p.cu:80

g1a_t
G1 point in affine coordinates.
Definition: g1.cuh:20

g1a_t::y
fp_t y
Definition: g1.cuh:21

g1a_t::x
fp_t x
Definition: g1.cuh:21

g1p_t
G1 point in projective coordinates.
Definition: g1.cuh:27

g1p_t::z
fp_t z
Definition: g1.cuh:28

g1p_t::x
fp_t x
Definition: g1.cuh:28

g1p_t::y
fp_t y
Definition: g1.cuh:28