FK20 CUDA
|
#include <stdint.h>
Go to the source code of this file.
Macros | |
#define | FR_CUH |
Typedefs | |
typedef uint64_t | fr_t[4] |
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t). fr_t[0] is the least significant element. More... | |
Functions | |
__device__ __host__ void | fr_fromUint64 (fr_t &z, const uint64_t *x) |
Converts from uint64_t[4] to a residue modulo r, without reduction. More... | |
__device__ void | fr_toUint64 (const fr_t &x, uint64_t *z) |
Converts fr_t to uint64_t[4]. More... | |
__device__ __host__ void | fr_cpy (fr_t &z, const fr_t &x) |
Copy from x into z. More... | |
__device__ void | fr_reduce4 (fr_t &z) |
Reduced the value in fr_t to the field modulus. More... | |
__device__ void | fr_neg (fr_t &z) |
Compute an additive inverse of a residue x modulo r. Stores in x. Subtracts x from the highest multiple of R less than 2^256, then adds r in case of underflow. More... | |
__device__ void | fr_x2 (fr_t &z) |
Multiply z by 2, and stores in z, with weak reduction. More... | |
__device__ void | fr_x3 (fr_t &z) |
Multiply z by 3, and stores in z, with weak reduction. More... | |
__device__ void | fr_x4 (fr_t &z) |
Multiply z by 4, and stores in z, with weak reduction. More... | |
__device__ void | fr_x8 (fr_t &z) |
Multiply z by 8, and stores in z, with weak reduction. More... | |
__device__ void | fr_x12 (fr_t &z) |
Multiply the residue mod r z by 12 with weak reduction. More... | |
__device__ void | fr_add (fr_t &z, const fr_t &x) |
Computes the sum of two residues x and z modulo r and stores it in z. Device only function. More... | |
__device__ void | fr_sub (fr_t &z, const fr_t &x) |
Calculates the difference of two residues modulo p and stores it into z. More... | |
__device__ void | fr_addsub (fr_t &x, fr_t &y) |
Computes the sum and the difference of the arguments, storing back into the arguments: (x,y) := (x+y,x-y). Device function. More... | |
__device__ void | fr_sqr (fr_t &z) |
Squares the value in z as a residue modulo r, and stores back into z. More... | |
__device__ void | fr_mul (fr_t &z, const fr_t &x) |
Multiply two residues module r z and x, stores back into z. More... | |
__device__ void | fr_inv (fr_t &z) |
Calculates the multiplicative inverse of z, and stores back. More... | |
__device__ __host__ void | fr_zero (fr_t &z) |
Sets the value of z to zero. More... | |
__device__ __host__ void | fr_one (fr_t &z) |
Sets the value of z to one. More... | |
__device__ bool | fr_eq (const fr_t &x, const fr_t &y) |
Compares two residues modulo r. More... | |
__device__ bool | fr_neq (const fr_t &x, const fr_t &y) |
Compares two fr_t residues. More... | |
__device__ bool | fr_nonzero (const fr_t &x) |
Check if the reduced input x is different from zero. More... | |
__device__ bool | fr_iszero (const fr_t &x) |
Checks if the residue x modulo f is congruent to zero. More... | |
__device__ bool | fr_isone (const fr_t &x) |
Checks if the residue x modulo f is congruent to one. More... | |
__device__ void | fr_print (const char *s, const fr_t &x) |
prints the canonical representation of x to STDOUT. More... | |
__device__ void | fr_fft (fr_t *output, const fr_t *input) |
FFT over Fr. More... | |
__device__ void | fr_ift (fr_t *output, const fr_t *input) |
Inverse FFT for fr_t[512]. More... | |
__global__ void | fr_fft_wrapper (fr_t *output, const fr_t *input) |
wrapper for fr_fft: FFT for fr_t[512] More... | |
__global__ void | fr_ift_wrapper (fr_t *output, const fr_t *input) |
wrapper for fr_ift: inverse FFT for fr_t[512] More... | |
__global__ void | fr_eq_wrapper (uint8_t *eq, int count, const fr_t *x, const fr_t *y) |
Checks equality of two arrays of fr_t, element wise, and store in a byte array. More... | |
Variables | |
__constant__ fr_t | fr_roots [515] |
Table for the precomputed root-of-unity values. More... | |
typedef uint64_t fr_t[4] |
Subgroup element stored as a 256-bit array (a 4-element little-endian array of uint64_t). fr_t[0] is the least significant element.
This type is used for the BLS12-381 subgroup. This group is derived from the case of Construction 6.6 in the taxonomy (eprint 2006/372), which results in a parameter x = -0xd201000000010000. The subgroup size is given by the equation
and is numerically 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001.
Implementation-wise, the following constants are hardcoded and indicated, when used: r - Modulus rmu - Reciprocal of the modulus rmmu0 - Maximum integer multiple of the modulus such that rmmu0 < 2**256 rmmu1 - Minimum integer multiple of the modulus such that rmmu1 >= 2**256
Computes the sum and the difference of the arguments, storing back into the arguments: (x,y) := (x+y,x-y). Device function.
(x,y) := (x+y,x-y)
[in,out] | x | First operand, will store the sum after execution. |
[in,out] | y | Second operand, will store the difference after execution. |
Definition at line 18 of file fr_addsub.cu.
Checks equality of two arrays of fr_t, element wise, and store in a byte array.
Uses the CUDA device to perform a fast comparision between two arrays of fr_t. This function has no limitation on the number and size of blocks.
[out] | eq | Array of count bytes, such that eq[i]==1 if x[i] == y[i], zero otherwise. |
[in] | count | Number of elements to be compared |
[in] | x | First array |
[in] | y | Second array |
Definition at line 99 of file fr.cu.
FFT over Fr.
Performs one FFT-512 for each thread block. This function must be called with 256 threads per block, i.e. dim3(256,1,1). Input and output arrays can overlap without side effects. There is no interleaving of data for different FFTs (the stride is 1).
[out] | output | |
[in] | input |
Definition at line 26 of file fr_fft.cu.
wrapper for fr_fft: FFT for fr_t[512]
Executes an FFT over many arrays fr_t[512]. One array per block. input and output can overlap without side effects. There is no interleaving of data for different FFTs.
[out] | output | |
[in] | input |
Definition at line 316 of file fr_fft.cu.
__device__ __host__ void fr_fromUint64 | ( | fr_t & | z, |
const uint64_t * | x | ||
) |
Inverse FFT for fr_t[512].
Performs one inverse FFT-512 in each thread block. This function must be called with 256 threads per block, i.e. dim3(256,1,1). Input and output arrays can overlap without side effects. There is no interleaving of data for different FFTs (the stride is 1).
[out] | output | |
[in] | input |
Definition at line 170 of file fr_fft.cu.
wrapper for fr_ift: inverse FFT for fr_t[512]
Executes an inverse FFT over many arrays fr_t[512]. One array per block. input and output can overlap without side effects. There is no interleaving of data for different iFFTs.
[out] | output | |
[in] | input |
Definition at line 345 of file fr_fft.cu.
__device__ void fr_inv | ( | fr_t & | z | ) |
Calculates the multiplicative inverse of z, and stores back.
[in,out] | z |
This function calculates the multiplicative inverse of the argument. An integer a is the inverse of z if a*z mod r == 1
Normally, the inverse is found by using the Extended Euclidean Algorithm, to find integers (z,y) to satisfy the Bézout's identity: a*z + r*y == gcd(a, r) == 1 which can be rewritten as: az-1 == (-y)*m which follows that a*z mod r == 1. This approach has complexity in the order of O(log2(r)).
This implementation uses Euler's theorem, calculating the inverse as z^(phi(r)-1). where phi is Euler's totient function. For the special case where r is prime, phi(r) = r-1. Therefore, the inverse here is calculated as z^(r-2). Although this is asymptotically worse than EEA, this implementation avoid flow divergence and uses 258 squarings and 55 multiplications. Furthermore, since curve operations are done in projective coordinates, inversions are needed only at the very end when projective coordinates are translated into affine coordinates.
Definition at line 33 of file fr_inv.cu.
__device__ bool fr_isone | ( | const fr_t & | x | ) |
Checks if the residue x modulo f is congruent to one.
[in] | x |
Definition at line 13 of file fr_isone.cu.
__device__ bool fr_iszero | ( | const fr_t & | x | ) |
Checks if the residue x modulo f is congruent to zero.
[in] | x |
Definition at line 13 of file fr_iszero.cu.
__device__ void fr_neg | ( | fr_t & | z | ) |
__device__ bool fr_nonzero | ( | const fr_t & | x | ) |
Check if the reduced input x is different from zero.
[in] | x |
Definition at line 12 of file fr_nonzero.cu.
__device__ __host__ void fr_one | ( | fr_t & | z | ) |
__device__ void fr_print | ( | const char * | s, |
const fr_t & | x | ||
) |
prints the canonical representation of x to STDOUT.
Prints the canonical hexadecimal representation of x to stdout, followed by linefeed; prints with leading zeros, and without the hex prefix.
[in] | s | Description string |
[in] | x |
Definition at line 41 of file fr.cu.
__device__ void fr_reduce4 | ( | fr_t & | z | ) |
Reduced the value in fr_t to the field modulus.
Modulus = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 and is hardcoded in the function.
z |
Definition at line 16 of file fr_reduce4.cu.
__device__ void fr_sqr | ( | fr_t & | z | ) |
__device__ void fr_toUint64 | ( | const fr_t & | x, |
uint64_t * | z | ||
) |
Converts fr_t to uint64_t[4].
Converts uint64_t[4] to fr_t. The 256-bit value in x is reduced to the canonical residue before being stored into z. The original value of x is unchanged.
[out] | z | Pointer to destination array. |
[in] | x | fr_t to be converted. |
Definition at line 76 of file fr.cu.
__device__ void fr_x12 | ( | fr_t & | z | ) |
__device__ void fr_x2 | ( | fr_t & | z | ) |
__device__ void fr_x3 | ( | fr_t & | z | ) |
__device__ void fr_x4 | ( | fr_t & | z | ) |
__device__ void fr_x8 | ( | fr_t & | z | ) |
__device__ __host__ void fr_zero | ( | fr_t & | z | ) |
|
extern |
Table for the precomputed root-of-unity values.
Table for the precomputed root-of-unity values.
Powers 0 to 512 of w = 5**((r-1)//512)r Extended with 2**-9 and 2**-9 / w on idx 513 and 514
These values are used to for improving Fourier transforms. Unrelated to that, they are also useful for mapping points of the G1 and G2 into Fq12. There, the roots of unity form a subgroup of Fq12, called Gt
Definition at line 17 of file fr_roots.cu.