FK20 CUDA
fp.cuh File Reference
#include <stdint.h>
Include dependency graph for fp.cuh:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define FP_CUH
 

Typedefs

typedef uint64_t fp_t[6]
 Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element little-endian array of uint64_t. More...
 

Functions

__device__ __host__ void fp_fromUint64 (fp_t &z, const uint64_t *x)
 Converts uint64_t[6] to fp_t. After this operation, z represents x mod p. More...
 
__device__ void fp_toUint64 (uint64_t *z, const fp_t &x)
 Converts from residue modulo p (fp_t) to uint64_t[6]. The converted value is in canonical form. More...
 
__device__ __host__ void fp_cpy (fp_t &z, const fp_t &x)
 Copy from x into z. More...
 
__device__ void fp_reduce6 (fp_t &z)
 Narrow reduction of a residue modulo p, reducing to the canonical representation. More...
 
__device__ void fp_neg (fp_t &z, const fp_t &x)
 Compute an additive inverse of a residue x modulo p. Stores in z. Subtracts x from the highest multiple of p less than 2^384, then adds p in case of underflow. More...
 
__device__ void fp_x2 (fp_t &z, const fp_t &x)
 Multiplies x by 2 and stores the result into z. More...
 
__device__ void fp_x3 (fp_t &z, const fp_t &x)
 Multiplies x by 3 and stores the result into z. More...
 
__device__ void fp_x4 (fp_t &z, const fp_t &x)
 Multiplies x by 4 and stores the result into z. More...
 
__device__ void fp_x8 (fp_t &z, const fp_t &x)
 Multiplies x by 8 and stores the result into z. More...
 
__device__ void fp_x12 (fp_t &z, const fp_t &x)
 Multiplies the residue mod p x by 12 and stores the result into z. More...
 
__device__ void fp_add (fp_t &z, const fp_t &x, const fp_t &y)
 Computes the sum of two residues x and y modulo p and stores it in z. Device only function. More...
 
__device__ void fp_sub (fp_t &z, const fp_t &x, const fp_t &y)
 Calculates the difference of two residues modulo p and stores it into z. More...
 
__device__ void fp_sqr (fp_t &z, const fp_t &x)
 Computes the square of the residue x modulo p and stores it in z. More...
 
__device__ void fp_mul (fp_t &z, const fp_t &x, const fp_t &y)
 Multiplies two Fp residues x and y, stores in z. More...
 
__device__ void fp_mma (fp_t &z, const fp_t &v, const fp_t &w, const fp_t &x, const fp_t &y)
 Fp multiply-multiply-add. Fast execution of z = (v*w + x*y) mod p The double-wide products are added before reduction, saving one reduction. More...
 
__device__ void fp_inv (fp_t &z, const fp_t &x)
 Calculates the multiplicative inverse of x and stores in z. More...
 
__device__ __host__ void fp_zero (fp_t &z)
 Sets z to zero. More...
 
__device__ __host__ void fp_one (fp_t &z)
 Sets z to one. More...
 
__device__ bool fp_eq (const fp_t &x, const fp_t &y)
 Compares two residues modulo p. More...
 
__device__ bool fp_neq (const fp_t &x, const fp_t &y)
 Compares two fp_t residues. More...
 
__device__ bool fp_nonzero (const fp_t &x)
 Check if the reduced input x is different from zero. More...
 
__device__ bool fp_iszero (const fp_t &x)
 Checks if the residue x modulo p is congruent to zero. More...
 
__device__ bool fp_isone (const fp_t &x)
 Checks if the residue x modulo p is congruent to one. More...
 
__device__ void fp_print (const char *s, const fp_t &x)
 Prints the canonical representation of x to STDOUT. More...
 

Macro Definition Documentation

◆ FP_CUH

#define FP_CUH

Definition at line 6 of file fp.cuh.

Typedef Documentation

◆ fp_t

typedef uint64_t fp_t[6]

Residue modulo p. Any 384-bit representative of each residue is allowed, and stored as a 6-element little-endian array of uint64_t.

Definition at line 14 of file fp.cuh.

Function Documentation

◆ fp_add()

__device__ void fp_add ( fp_t z,
const fp_t x,
const fp_t y 
)

Computes the sum of two residues x and y modulo p and stores it in z. Device only function.

Parameters
[out]z
[in]x
[in]y
Returns
void

Definition at line 17 of file fp_add.cu.

Here is the caller graph for this function:

◆ fp_cpy()

__device__ __host__ void fp_cpy ( fp_t z,
const fp_t x 
)

Copy from x into z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 14 of file fp_cpy.cu.

Here is the caller graph for this function:

◆ fp_eq()

__device__ bool fp_eq ( const fp_t x,
const fp_t y 
)

Compares two residues modulo p.

Parameters
[in]x
[in]y
Returns
bool 1 if the values are equal, 0 otherwise

Definition at line 14 of file fp_eq.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_fromUint64()

__device__ __host__ void fp_fromUint64 ( fp_t z,
const uint64_t *  x 
)

Converts uint64_t[6] to fp_t. After this operation, z represents x mod p.

Parameters
[out]zResidue modulo p
[in]xArray of uint64_t
Returns
void

Definition at line 58 of file fp.cu.

Here is the caller graph for this function:

◆ fp_inv()

__device__ void fp_inv ( fp_t z,
const fp_t x 
)

Calculates the multiplicative inverse of x and stores in z.

  • This function calculates the multiplicative inverse of the argument. An integer a is the inverse of z if a*z mod r == 1

Normally, the inverse is found by using the Extended Euclidean Algorithm, to find integers (z,y) to satisfy the Bézout's identity: a*z + r*y == gcd(a, r) == 1 which can be rewritten as: az-1 == (-y)*m which follows that a*z mod r == 1. This approach has complexity in the order of O(log2(r)).

This implementation uses Euler's theorem, calculating the inverse as z^(phi(r)-1). where phi is Euler's totient function. For the special case where r is prime, phi(r) = r-1. Therefore, the inverse here is calculated as z^(r-2). Although this is asymptotically worse than EEA, this implementation avoid flow divergence and uses 279 squarings and 128 multiplications. Furthermore, since curve operations are done in projective coordinates, inversions are needed only at the very end when projective coordinates are translated into affine coordinates.

Parameters
[out]z
[in]x
Returns
void

Definition at line 33 of file fp_inv.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_isone()

__device__ bool fp_isone ( const fp_t x)

Checks if the residue x modulo p is congruent to one.

Parameters
[in]x
Returns
bool 1 if true, 0 otherwise

Definition at line 13 of file fp_isone.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_iszero()

__device__ bool fp_iszero ( const fp_t x)

Checks if the residue x modulo p is congruent to zero.

Parameters
[in]x
Returns
bool 1 if true, 0 otherwise

Definition at line 13 of file fp_iszero.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_mma()

__device__ void fp_mma ( fp_t z,
const fp_t v,
const fp_t w,
const fp_t x,
const fp_t y 
)

Fp multiply-multiply-add. Fast execution of z = (v*w + x*y) mod p The double-wide products are added before reduction, saving one reduction.

Parameters
[out]z
[in]v
[in]w
[in]x
[in]y
Returns
void

Definition at line 20 of file fp_mma.cu.

Here is the caller graph for this function:

◆ fp_mul()

__device__ void fp_mul ( fp_t z,
const fp_t x,
const fp_t y 
)

Multiplies two Fp residues x and y, stores in z.

Parameters
[out]z
[in]x
[in]y
Returns
void

Definition at line 17 of file fp_mul.cu.

Here is the caller graph for this function:

◆ fp_neg()

__device__ void fp_neg ( fp_t z,
const fp_t x 
)

Compute an additive inverse of a residue x modulo p. Stores in z. Subtracts x from the highest multiple of p less than 2^384, then adds p in case of underflow.

Parameters
[out]z
[in]x
Returns
void

Definition at line 16 of file fp_neg.cu.

Here is the caller graph for this function:

◆ fp_neq()

__device__ bool fp_neq ( const fp_t x,
const fp_t y 
)

Compares two fp_t residues.

Parameters
[in]x
[in]y
Returns
bool 1 if the values are not equal, 0 otherwise

Definition at line 14 of file fp_neq.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_nonzero()

__device__ bool fp_nonzero ( const fp_t x)

Check if the reduced input x is different from zero.

Parameters
[in]x
Returns
bool 1 if true, zero otherwise.

Definition at line 12 of file fp_nonzero.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_one()

__device__ __host__ void fp_one ( fp_t z)

Sets z to one.

Parameters
[in,out]z
Returns
void

Definition at line 26 of file fp.cu.

Here is the caller graph for this function:

◆ fp_print()

__device__ void fp_print ( const char *  s,
const fp_t x 
)

Prints the canonical representation of x to STDOUT.

Parameters
[in]sDescription string
[in]xResidue modulo p
Returns
void

Definition at line 39 of file fp.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_reduce6()

__device__ void fp_reduce6 ( fp_t z)

Narrow reduction of a residue modulo p, reducing to the canonical representation.

Parameters
[in,out]zresidue modulo p
Returns
void

Definition at line 14 of file fp_reduce6.cu.

Here is the caller graph for this function:

◆ fp_sqr()

__device__ void fp_sqr ( fp_t z,
const fp_t x 
)

Computes the square of the residue x modulo p and stores it in z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 16 of file fp_sqr.cu.

Here is the caller graph for this function:

◆ fp_sub()

__device__ void fp_sub ( fp_t z,
const fp_t x,
const fp_t y 
)

Calculates the difference of two residues modulo p and stores it into z.

Parameters
[out]z
[in]xminuend
[in]ysubtrahend
Returns
void

Definition at line 16 of file fp_sub.cu.

Here is the caller graph for this function:

◆ fp_toUint64()

__device__ void fp_toUint64 ( uint64_t *  z,
const fp_t x 
)

Converts from residue modulo p (fp_t) to uint64_t[6]. The converted value is in canonical form.

Parameters
[out]z
[in]x
Returns
void

Definition at line 75 of file fp.cu.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fp_x12()

__device__ void fp_x12 ( fp_t z,
const fp_t x 
)

Multiplies the residue mod p x by 12 and stores the result into z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 15 of file fp_x12.cu.

Here is the caller graph for this function:

◆ fp_x2()

__device__ void fp_x2 ( fp_t z,
const fp_t x 
)

Multiplies x by 2 and stores the result into z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 15 of file fp_x2.cu.

Here is the caller graph for this function:

◆ fp_x3()

__device__ void fp_x3 ( fp_t z,
const fp_t x 
)

Multiplies x by 3 and stores the result into z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 15 of file fp_x3.cu.

Here is the caller graph for this function:

◆ fp_x4()

__device__ void fp_x4 ( fp_t z,
const fp_t x 
)

Multiplies x by 4 and stores the result into z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 15 of file fp_x4.cu.

Here is the caller graph for this function:

◆ fp_x8()

__device__ void fp_x8 ( fp_t z,
const fp_t x 
)

Multiplies x by 8 and stores the result into z.

Parameters
[out]z
[in]x
Returns
void

Definition at line 15 of file fp_x8.cu.

Here is the caller graph for this function:

◆ fp_zero()

__device__ __host__ void fp_zero ( fp_t z)

Sets z to zero.

Parameters
[out]z
Returns
void

Definition at line 15 of file fp.cu.

Here is the caller graph for this function: