Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crypto/fipsmodule/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) OR
generic/bignum_mul.S
generic/bignum_optsub.S
generic/bignum_sqr.S

fastmul/bignum_kmul_16_32_neon.S
fastmul/bignum_kmul_32_64_neon.S
fastmul/bignum_ksqr_16_32_neon.S
fastmul/bignum_ksqr_32_64_neon.S
fastmul/bignum_emontredc_8n_neon.S
)
endif()
endif()
Expand Down
35 changes: 25 additions & 10 deletions crypto/fipsmodule/bn/montgomery.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,15 +477,29 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap,
uint64_t w = n0[0];

if (num == 32) {
if (ap == bp)
bignum_ksqr_32_64(mulres, ap, t);
else
bignum_kmul_32_64(mulres, ap, bp, t);
if (CRYPTO_is_NEON_capable()) {
if (ap == bp)
bignum_ksqr_32_64_neon(mulres, ap, t);
else
bignum_kmul_32_64_neon(mulres, ap, bp, t);
} else {
if (ap == bp)
bignum_ksqr_32_64(mulres, ap, t);
else
bignum_kmul_32_64(mulres, ap, bp, t);
}
} else if (num == 16) {
if (ap == bp)
bignum_ksqr_16_32(mulres, ap, t);
else
bignum_kmul_16_32(mulres, ap, bp, t);
if (CRYPTO_is_NEON_capable()) {
if (ap == bp)
bignum_ksqr_16_32_neon(mulres, ap, t);
else
bignum_kmul_16_32_neon(mulres, ap, bp, t);
} else {
if (ap == bp)
bignum_ksqr_16_32(mulres, ap, t);
else
bignum_kmul_16_32(mulres, ap, bp, t);
}
} else {
if (ap == bp)
bignum_sqr(num * 2, mulres, num, ap);
Expand All @@ -504,8 +518,9 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap,
// A. The result of step 1 >= 2^(64*num), meaning that bignum_emontredc_8n
// returned 1. Since m is less than 2^(64*num), (result of step 1) >= m holds.
// B. The result of step 1 fits in 2^(64*num), and the result >= m.
uint64_t c;
c = bignum_emontredc_8n(num, mulres, np, w); // c: case A
uint64_t c = CRYPTO_is_NEON_capable() ?
bignum_emontredc_8n_neon(num, mulres, np, w) :
bignum_emontredc_8n(num, mulres, np, w); // c: case A
c |= bignum_ge(num, mulres + num, num, np); // c: case B
// Optionally subtract and store the result at rp
bignum_optsub(num, rp, mulres + num, c, np);
Expand Down
16 changes: 16 additions & 0 deletions third_party/s2n-bignum/include/s2n-bignum_aws-lc.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,19 @@ extern void curve25519_x25519base_byte_alt(uint8_t res[static 32], const uint8_t
extern void
bignum_ksqr_32_64(uint64_t z[static 64], const uint64_t x[static 32],
uint64_t t[static S2NBIGNUM_KSQR_32_64_TEMP_NWORDS]);
extern void
bignum_ksqr_32_64_neon(uint64_t z[static 64], const uint64_t x[static 32],
uint64_t t[static S2NBIGNUM_KSQR_32_64_TEMP_NWORDS]);

// Evaluate z := x^2 where x is a 1024-bit integer.
// Input: x[16]; output: z[32]; temporary buffer: t[>=24]
#define S2NBIGNUM_KSQR_16_32_TEMP_NWORDS 24
extern void
bignum_ksqr_16_32(uint64_t z[static 32], const uint64_t x[static 16],
uint64_t t[static S2NBIGNUM_KSQR_16_32_TEMP_NWORDS]);
extern void
bignum_ksqr_16_32_neon(uint64_t z[static 32], const uint64_t x[static 16],
uint64_t t[static S2NBIGNUM_KSQR_16_32_TEMP_NWORDS]);

// Evaluate z := x * y where x and y are 2048-bit integers.
// Inputs: x[32], y[32]; output: z[64]; temporary buffer t[>=96]
Expand All @@ -152,6 +158,10 @@ extern void
bignum_kmul_32_64(uint64_t z[static 64], const uint64_t x[static 32],
const uint64_t y[static 32],
uint64_t t[static S2NBIGNUM_KMUL_32_64_TEMP_NWORDS]);
extern void
bignum_kmul_32_64_neon(uint64_t z[static 64], const uint64_t x[static 32],
const uint64_t y[static 32],
uint64_t t[static S2NBIGNUM_KMUL_32_64_TEMP_NWORDS]);

// Evaluate z := x * y where x and y are 1024-bit integers.
// Inputs: x[16], y[16]; output: z[32]; temporary buffer t[>=32]
Expand All @@ -160,6 +170,10 @@ extern void
bignum_kmul_16_32(uint64_t z[static 32], const uint64_t x[static 16],
const uint64_t y[static 16],
uint64_t t[static S2NBIGNUM_KMUL_16_32_TEMP_NWORDS]);
extern void
bignum_kmul_16_32_neon(uint64_t z[static 32], const uint64_t x[static 16],
const uint64_t y[static 16],
uint64_t t[static S2NBIGNUM_KMUL_16_32_TEMP_NWORDS]);

// Extended Montgomery reduce in 8-digit blocks.
// Assumes that z initially holds a 2k-digit bignum z_0, m is a k-digit odd
Expand All @@ -178,6 +192,8 @@ bignum_kmul_16_32(uint64_t z[static 32], const uint64_t x[static 16],
// Inputs: z[2*k], m[k], w; outputs: function return (extra result bit) and z[2*k]
extern uint64_t bignum_emontredc_8n(uint64_t k, uint64_t *z, const uint64_t *m,
uint64_t w);
extern uint64_t bignum_emontredc_8n_neon(uint64_t k, uint64_t *z, const uint64_t *m,
uint64_t w);

// Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero)
// Inputs: x[k], p, y[k]; outputs: function return (carry-out) and z[k]
Expand Down