FK20 CUDA
fr.cuh File Reference
#include <stdint.h>
Include dependency graph for fr.cuh:

Go to the source code of this file.

Macros

#define FR_CUH
 

Typedefs

typedef uint64_t fr_t[4]
 Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t). fr_t[0] is the least significant element. More...
 

Functions

__device__ __host__ void fr_fromUint64 (fr_t &z, const uint64_t *x)
 Converts from uint64_t[4] to a residue modulo r, without reduction. More...
 
__device__ void fr_toUint64 (const fr_t &x, uint64_t *z)
 Converts fr_t to uint64_t[4]. More...
 
__device__ __host__ void fr_cpy (fr_t &z, const fr_t &x)
 Copy from x into z. More...
 
__device__ void fr_reduce4 (fr_t &z)
 Reduced the value in fr_t to the field modulus. More...
 
__device__ void fr_neg (fr_t &z)
 Compute an additive inverse of a residue x modulo r. Stores in x. Subtracts x from the highest multiple of R less than 2^256, then adds r in case of underflow. More...
 
__device__ void fr_x2 (fr_t &z)
 Multiply z by 2, and stores in z, with weak reduction. More...
 
__device__ void fr_x3 (fr_t &z)
 Multiply z by 3, and stores in z, with weak reduction. More...
 
__device__ void fr_x4 (fr_t &z)
 Multiply z by 4, and stores in z, with weak reduction. More...
 
__device__ void fr_x8 (fr_t &z)
 Multiply z by 8, and stores in z, with weak reduction. More...
 
__device__ void fr_x12 (fr_t &z)
 Multiply the residue mod r z by 12 with weak reduction. More...
 
__device__ void fr_add (fr_t &z, const fr_t &x)
 Computes the sum of two residues x and z modulo r and stores it in z. Device only function. More...
 
__device__ void fr_sub (fr_t &z, const fr_t &x)
 Calculates the difference of two residues modulo p and stores it into z. More...
 
__device__ void fr_addsub (fr_t &x, fr_t &y)
 Computes the sum and the difference of the arguments, storing back into the arguments: (x,y) := (x+y,x-y). Device function. More...
 
__device__ void fr_sqr (fr_t &z)
 Squares the value in z as a residue modulo r, and stores back into z. More...
 
__device__ void fr_mul (fr_t &z, const fr_t &x)
 Multiply two residues module r z and x, stores back into z. More...
 
__device__ void fr_inv (fr_t &z)
 Calculates the multiplicative inverse of z, and stores back. More...
 
__device__ __host__ void fr_zero (fr_t &z)
 Sets the value of z to zero. More...
 
__device__ __host__ void fr_one (fr_t &z)
 Sets the value of z to one. More...
 
__device__ bool fr_eq (const fr_t &x, const fr_t &y)
 Compares two residues modulo r. More...
 
__device__ bool fr_neq (const fr_t &x, const fr_t &y)
 Compares two fr_t residues. More...
 
__device__ bool fr_nonzero (const fr_t &x)
 Check if the reduced input x is different from zero. More...
 
__device__ bool fr_iszero (const fr_t &x)
 Checks if the residue x modulo f is congruent to zero. More...
 
__device__ bool fr_isone (const fr_t &x)
 Checks if the residue x modulo f is congruent to one. More...
 
__device__ void fr_print (const char *s, const fr_t &x)
 prints the canonical representation of x to STDOUT. More...
 
__device__ void fr_fft (fr_t *output, const fr_t *input)
 FFT over Fr. More...
 
__device__ void fr_ift (fr_t *output, const fr_t *input)
 Inverse FFT for fr_t[512]. More...
 
__global__ void fr_fft_wrapper (fr_t *output, const fr_t *input)
 wrapper for fr_fft: FFT for fr_t[512] More...
 
__global__ void fr_ift_wrapper (fr_t *output, const fr_t *input)
 wrapper for fr_ift: inverse FFT for fr_t[512] More...
 
__global__ void fr_eq_wrapper (uint8_t *eq, int count, const fr_t *x, const fr_t *y)
 Checks equality of two arrays of fr_t, element wise, and store in a byte array. More...
 

Variables

__constant__ fr_t fr_roots [515]
 Table for the precomputed root-of-unity values. More...
 

Macro Definition Documentation

◆ FR_CUH

#define FR_CUH

Definition at line 6 of file fr.cuh.

Typedef Documentation

◆ fr_t

typedef uint64_t fr_t[4]

Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t). fr_t[0] is the least significant element.

This type is used for the BLS12-381 subgroup. This group is derived from the case $ k \equiv 0 (mod 6) $ of Construction 6.6 in the taxonomy (eprint 2006/372), which results in a parameter x = -0xd201000000010000. The subgroup size is given by the equation $ x^4 -x^2 +1 $ and is numerically 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001.

Implementation-wise, the following constants are hardcoded and indicated, when used: r - Modulus rmu - Reciprocal of the modulus rmmu0 - Maximum integer multiple of the modulus such that rmmu0 < 2**256 rmmu1 - Minimum integer multiple of the modulus such that rmmu1 >= 2**256

Definition at line 24 of file fr.cuh.

Function Documentation

◆ fr_add()

__device__ void fr_add ( fr_t z,
const fr_t x 
)

Computes the sum of two residues x and z modulo r and stores it in z. Device only function.

Parameters
[in,out]z
[in]x
Returns
void

Definition at line 16 of file fr_add.cu.

Here is the caller graph for this function:

◆ fr_addsub()

__device__ void fr_addsub ( fr_t x,
fr_t y 
)

Computes the sum and the difference of the arguments, storing back into the arguments: (x,y) := (x+y,x-y). Device function.

(x,y) := (x+y,x-y)

Parameters
[in,out]xFirst operand, will store the sum after execution.
[in,out]ySecond operand, will store the difference after execution.
Returns
void

Definition at line 18 of file fr_addsub.cu.

Here is the caller graph for this function:

◆ fr_cpy()

__device__ __host__ void fr_cpy ( fr_t z,
const fr_t x 
)

Copy from x into z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 14 of file fr_cpy.cu.

Here is the caller graph for this function:

◆ fr_eq()

__device__ bool fr_eq ( const fr_t x,
const fr_t y 
)

Compares two residues modulo r.

Parameters
[in]x
[in]y
Returns
bool 1 if the values are equal, 0 otherwise

Definition at line 13 of file fr_eq.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_eq_wrapper()

__global__ void fr_eq_wrapper ( uint8_t *  eq,
int  count,
const fr_t x,
const fr_t y 
)

Checks equality of two arrays of fr_t, element wise, and store in a byte array.

Uses the CUDA device to perform a fast comparision between two arrays of fr_t. This function has no limitation on the number and size of blocks.

Parameters
[out]eqArray of count bytes, such that eq[i]==1 if x[i] == y[i], zero otherwise.
[in]countNumber of elements to be compared
[in]xFirst array
[in]ySecond array
Returns
void

Definition at line 99 of file fr.cu.

Here is the call graph for this function:

◆ fr_fft()

__device__ void fr_fft ( fr_t output,
const fr_t input 
)

FFT over Fr.

Performs one FFT-512 for each thread block. This function must be called with 256 threads per block, i.e. dim3(256,1,1). Input and output arrays can overlap without side effects. There is no interleaving of data for different FFTs (the stride is 1).

Parameters
[out]output
[in]input
Returns
void

Definition at line 26 of file fr_fft.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_fft_wrapper()

__global__ void fr_fft_wrapper ( fr_t output,
const fr_t input 
)

wrapper for fr_fft: FFT for fr_t[512]

Executes an FFT over many arrays fr_t[512]. One array per block. input and output can overlap without side effects. There is no interleaving of data for different FFTs.

Parameters
[out]output
[in]input
Returns
void

Definition at line 316 of file fr_fft.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_fromUint64()

__device__ __host__ void fr_fromUint64 ( fr_t z,
const uint64_t *  x 
)

Converts from uint64_t[4] to a residue modulo r, without reduction.

Parameters
[out]zResidue modulo r
[in]xArray of uint64_t
Returns
void

Definition at line 59 of file fr.cu.

◆ fr_ift()

__device__ void fr_ift ( fr_t output,
const fr_t input 
)

Inverse FFT for fr_t[512].

Performs one inverse FFT-512 in each thread block. This function must be called with 256 threads per block, i.e. dim3(256,1,1). Input and output arrays can overlap without side effects. There is no interleaving of data for different FFTs (the stride is 1).

Parameters
[out]output
[in]input
Returns
void

Definition at line 170 of file fr_fft.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_ift_wrapper()

__global__ void fr_ift_wrapper ( fr_t output,
const fr_t input 
)

wrapper for fr_ift: inverse FFT for fr_t[512]

Executes an inverse FFT over many arrays fr_t[512]. One array per block. input and output can overlap without side effects. There is no interleaving of data for different iFFTs.

Parameters
[out]output
[in]input
Returns
void

Definition at line 345 of file fr_fft.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_inv()

__device__ void fr_inv ( fr_t z)

Calculates the multiplicative inverse of z, and stores back.

Parameters
[in,out]z
Returns
void

This function calculates the multiplicative inverse of the argument. An integer a is the inverse of z if a*z mod r == 1

Normally, the inverse is found by using the Extended Euclidean Algorithm, to find integers (z,y) to satisfy the Bézout's identity: a*z + r*y == gcd(a, r) == 1 which can be rewritten as: az-1 == (-y)*m which follows that a*z mod r == 1. This approach has complexity in the order of O(log2(r)).

This implementation uses Euler's theorem, calculating the inverse as z^(phi(r)-1). where phi is Euler's totient function. For the special case where r is prime, phi(r) = r-1. Therefore, the inverse here is calculated as z^(r-2). Although this is asymptotically worse than EEA, this implementation avoid flow divergence and uses 258 squarings and 55 multiplications. Furthermore, since curve operations are done in projective coordinates, inversions are needed only at the very end when projective coordinates are translated into affine coordinates.

Definition at line 33 of file fr_inv.cu.

Here is the call graph for this function:

◆ fr_isone()

__device__ bool fr_isone ( const fr_t x)

Checks if the residue x modulo f is congruent to one.

Parameters
[in]x
Returns
bool 1 if true, 0 otherwise

Definition at line 13 of file fr_isone.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_iszero()

__device__ bool fr_iszero ( const fr_t x)

Checks if the residue x modulo f is congruent to zero.

Parameters
[in]x
Returns
bool 1 if the reduced value is zero, 0 otherwise.

Definition at line 13 of file fr_iszero.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_mul()

__device__ void fr_mul ( fr_t z,
const fr_t x 
)

Multiply two residues module r z and x, stores back into z.

Parameters
[in,out]zfirst multiplicand, stores the product
[in]xsecond multiplicand
Returns
void

Definition at line 13 of file fr_mul.cu.

Here is the caller graph for this function:

◆ fr_neg()

__device__ void fr_neg ( fr_t z)

Compute an additive inverse of a residue x modulo r. Stores in x. Subtracts x from the highest multiple of R less than 2^256, then adds r in case of underflow.

Parameters
z
Returns
void

Definition at line 15 of file fr_neg.cu.

◆ fr_neq()

__device__ bool fr_neq ( const fr_t x,
const fr_t y 
)

Compares two fr_t residues.

Parameters
[in]x
[in]y
Returns
bool 1 if the values are not equal, 0 otherwise

Definition at line 15 of file fr_neq.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_nonzero()

__device__ bool fr_nonzero ( const fr_t x)

Check if the reduced input x is different from zero.

Parameters
[in]x
Returns
bool 1 if x!=0, 0 otherwise

Definition at line 12 of file fr_nonzero.cu.

Here is the call graph for this function:

◆ fr_one()

__device__ __host__ void fr_one ( fr_t z)

Sets the value of z to one.

Parameters
[out]z
Returns
void

Definition at line 26 of file fr.cu.

Here is the caller graph for this function:

◆ fr_print()

__device__ void fr_print ( const char *  s,
const fr_t x 
)

prints the canonical representation of x to STDOUT.

Prints the canonical hexadecimal representation of x to stdout, followed by linefeed; prints with leading zeros, and without the hex prefix.

Parameters
[in]sDescription string
[in]x
Returns
void

Definition at line 41 of file fr.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fr_reduce4()

__device__ void fr_reduce4 ( fr_t z)

Reduced the value in fr_t to the field modulus.

Modulus = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 and is hardcoded in the function.

Parameters
z
Returns
void

Definition at line 16 of file fr_reduce4.cu.

Here is the caller graph for this function:

◆ fr_sqr()

__device__ void fr_sqr ( fr_t z)

Squares the value in z as a residue modulo r, and stores back into z.

Parameters
[in,out]z
Returns
void

Definition at line 14 of file fr_sqr.cu.

Here is the caller graph for this function:

◆ fr_sub()

__device__ void fr_sub ( fr_t z,
const fr_t x 
)

Calculates the difference of two residues modulo p and stores it into z.

z-=x

Parameters
[in,out]zminuend, difference
xsubtrahend
Returns
void

Definition at line 17 of file fr_sub.cu.

Here is the caller graph for this function:

◆ fr_toUint64()

__device__ void fr_toUint64 ( const fr_t x,
uint64_t *  z 
)

Converts fr_t to uint64_t[4].

Converts uint64_t[4] to fr_t. The 256-bit value in x is reduced to the canonical residue before being stored into z. The original value of x is unchanged.

Parameters
[out]zPointer to destination array.
[in]xfr_t to be converted.
Returns
void

Definition at line 76 of file fr.cu.

Here is the call graph for this function:

◆ fr_x12()

__device__ void fr_x12 ( fr_t z)

Multiply the residue mod r z by 12 with weak reduction.

Parameters
[in,out]z
Returns
void

Definition at line 13 of file fr_x12.cu.

Here is the caller graph for this function:

◆ fr_x2()

__device__ void fr_x2 ( fr_t z)

Multiply z by 2, and stores in z, with weak reduction.

Parameters
[in,out]z
Returns
void

Definition at line 13 of file fr_x2.cu.

Here is the caller graph for this function:

◆ fr_x3()

__device__ void fr_x3 ( fr_t z)

Multiply z by 3, and stores in z, with weak reduction.

Parameters
[in,out]z
Returns
void

Definition at line 12 of file fr_x3.cu.

Here is the caller graph for this function:

◆ fr_x4()

__device__ void fr_x4 ( fr_t z)

Multiply z by 4, and stores in z, with weak reduction.

Parameters
[in,out]z
Returns
void

Definition at line 13 of file fr_x4.cu.

Here is the caller graph for this function:

◆ fr_x8()

__device__ void fr_x8 ( fr_t z)

Multiply z by 8, and stores in z, with weak reduction.

Parameters
[in,out]z
Returns
void

Definition at line 13 of file fr_x8.cu.

Here is the caller graph for this function:

◆ fr_zero()

__device__ __host__ void fr_zero ( fr_t z)

Sets the value of z to zero.

Parameters
[out]z
Returns
void

Definition at line 15 of file fr.cu.

Here is the caller graph for this function:

Variable Documentation

◆ fr_roots

__constant__ fr_t fr_roots[515]
extern

Table for the precomputed root-of-unity values.

Table for the precomputed root-of-unity values.

Powers 0 to 512 of w = 5**((r-1)//512)r Extended with 2**-9 and 2**-9 / w on idx 513 and 514

These values are used to for improving Fourier transforms. Unrelated to that, they are also useful for mapping points of the G1 and G2 into Fq12. There, the roots of unity form a subgroup of Fq12, called Gt

Definition at line 17 of file fr_roots.cu.