From 5cbdabc2c352a724f6ffcc2be735ff63af5402cd Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 17 May 2021 14:01:43 +0200 Subject: [PATCH] Add basic support for s390x --- build.rs | 12 ++-- crypto/fipsmodule/aes/aes_nohw.c | 32 +++++------ crypto/fipsmodule/bn/montgomery.c | 14 +++++ crypto/fipsmodule/ec/p256_shared.h | 9 ++- crypto/internal.h | 88 ++++++++++++++++++++++++++++++ crypto/poly1305/poly1305.c | 7 +-- include/ring-core/base.h | 3 + 7 files changed, 136 insertions(+), 29 deletions(-) diff --git a/build.rs b/build.rs index b50128a37f..cc86af732c 100644 --- a/build.rs +++ b/build.rs @@ -40,12 +40,12 @@ const RING_SRCS: &[(&[&str], &str)] = &[ (&[], "crypto/mem.c"), (&[], "crypto/poly1305/poly1305.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/crypto.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/curve25519/curve25519.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/ecp_nistz.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/gfp_p256.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/gfp_p384.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/p256.c"), + (&[], "crypto/crypto.c"), + (&[], "crypto/curve25519/curve25519.c"), + (&[], "crypto/fipsmodule/ec/ecp_nistz.c"), + (&[], "crypto/fipsmodule/ec/gfp_p256.c"), + (&[], "crypto/fipsmodule/ec/gfp_p384.c"), + (&[], "crypto/fipsmodule/ec/p256.c"), (&[X86_64, X86], "crypto/cpu-intel.c"), diff --git a/crypto/fipsmodule/aes/aes_nohw.c b/crypto/fipsmodule/aes/aes_nohw.c index 308c7cca1c..503958957f 100644 --- a/crypto/fipsmodule/aes/aes_nohw.c +++ b/crypto/fipsmodule/aes/aes_nohw.c @@ -334,19 +334,18 @@ static inline uint8_t lo(uint32_t a) { static inline void aes_nohw_compact_block(aes_word_t out[AES_NOHW_BLOCK_WORDS], const uint8_t in[16]) { - OPENSSL_memcpy(out, in, 16); #if defined(OPENSSL_SSE2) - // No conversions needed. + OPENSSL_memcpy(out, in, 16); // No conversions needed. #elif defined(OPENSSL_64_BIT) - uint64_t a0 = aes_nohw_compact_word(out[0]); - uint64_t a1 = aes_nohw_compact_word(out[1]); + uint64_t a0 = aes_nohw_compact_word(CRYPTO_read_le64(in)); + uint64_t a1 = aes_nohw_compact_word(CRYPTO_read_le64(in + 8)); out[0] = (a0 & UINT64_C(0x00000000ffffffff)) | (a1 << 32); out[1] = (a1 & UINT64_C(0xffffffff00000000)) | (a0 >> 32); #else - uint32_t a0 = aes_nohw_compact_word(out[0]); - uint32_t a1 = aes_nohw_compact_word(out[1]); - uint32_t a2 = aes_nohw_compact_word(out[2]); - uint32_t a3 = aes_nohw_compact_word(out[3]); + uint32_t a0 = aes_nohw_compact_word(CRYPTO_read_le32(in)); + uint32_t a1 = aes_nohw_compact_word(CRYPTO_read_le32(in + 4)); + uint32_t a2 = aes_nohw_compact_word(CRYPTO_read_le32(in + 8)); + uint32_t a3 = aes_nohw_compact_word(CRYPTO_read_le32(in + 12)); // Note clang, when building for ARM Thumb2, will sometimes miscompile // expressions such as (a0 & 0x0000ff00) << 8, particularly when building // without optimizations. This bug was introduced in @@ -370,8 +369,8 @@ static inline void aes_nohw_uncompact_block( aes_nohw_uncompact_word((a0 & UINT64_C(0x00000000ffffffff)) | (a1 << 32)); uint64_t b1 = aes_nohw_uncompact_word((a1 & UINT64_C(0xffffffff00000000)) | (a0 >> 32)); - OPENSSL_memcpy(out, &b0, 8); - OPENSSL_memcpy(out + 8, &b1, 8); + CRYPTO_write_le64(b0, out); + CRYPTO_write_le64(b1, out + 8); #else uint32_t a0 = in[0]; uint32_t a1 = in[1]; @@ -392,10 +391,10 @@ static inline void aes_nohw_uncompact_block( b1 = aes_nohw_uncompact_word(b1); b2 = aes_nohw_uncompact_word(b2); b3 = aes_nohw_uncompact_word(b3); - OPENSSL_memcpy(out, &b0, 4); - OPENSSL_memcpy(out + 4, &b1, 4); - OPENSSL_memcpy(out + 8, &b2, 4); - OPENSSL_memcpy(out + 12, &b3, 4); + CRYPTO_write_le32(b0, out); + CRYPTO_write_le32(b1, out + 4); + CRYPTO_write_le32(b2, out + 8); + CRYPTO_write_le32(b3, out + 12); #endif } @@ -913,18 +912,17 @@ void aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, // Make |AES_NOHW_BATCH_SIZE| copies of |ivec|. alignas(AES_NOHW_WORD_SIZE) union { - uint32_t u32[AES_NOHW_BATCH_SIZE * 4]; uint8_t u8[AES_NOHW_BATCH_SIZE * 16]; } ivs, enc_ivs; for (size_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) { OPENSSL_memcpy(ivs.u8 + 16 * i, ivec, 16); } - uint32_t ctr = CRYPTO_bswap4(ivs.u32[3]); + uint32_t ctr = CRYPTO_read_be32(ivs.u8 + 12); for (;;) { // Update counters. for (uint32_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) { - ivs.u32[4 * i + 3] = CRYPTO_bswap4(ctr + i); + CRYPTO_write_be32(ctr + i, ivs.u8 + 16 * i + 12); } size_t todo = blocks >= AES_NOHW_BATCH_SIZE ? AES_NOHW_BATCH_SIZE : blocks; diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index b1f1c69329..e047bf5a6a 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -156,3 +156,17 @@ int bn_from_montgomery_in_place(BN_ULONG r[], size_t num_r, BN_ULONG a[], } return 1; } + +#if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) && \ + !defined(OPENSSL_ARM) && !defined(OPENSSL_AARCH64) +void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + Limb tmp[2 * num]; + for (size_t i = 0; i < num; i++) + tmp[i] = 0; + for (size_t i = 0; i < num; i++) + tmp[num + i] = limbs_mul_add_limb(tmp + i, ap, bp[i], num); + + bn_from_montgomery_in_place(rp, num, tmp, 2 * num, np, num, n0); +} +#endif diff --git a/crypto/fipsmodule/ec/p256_shared.h b/crypto/fipsmodule/ec/p256_shared.h index 4dd325bee1..3c1f107a65 100644 --- a/crypto/fipsmodule/ec/p256_shared.h +++ b/crypto/fipsmodule/ec/p256_shared.h @@ -50,7 +50,14 @@ typedef unsigned char P256_SCALAR_BYTES[33]; static inline void p256_scalar_bytes_from_limbs( P256_SCALAR_BYTES bytes_out, const BN_ULONG limbs[P256_LIMBS]) { - OPENSSL_memcpy(bytes_out, limbs, 32); + for (int i = 0; i < P256_LIMBS; i++) + { +#if BN_BITS2 == 64 + CRYPTO_write_le64(limbs[i], bytes_out + i * 8); +#else + CRYPTO_write_le32(limbs[i], bytes_out + i * 4); +#endif + } bytes_out[32] = 0; } diff --git a/crypto/internal.h b/crypto/internal.h index ebeb0c57c8..ee17a87fc6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -264,6 +264,10 @@ static inline crypto_word constant_time_select_w(crypto_word mask, static inline uint32_t CRYPTO_bswap4(uint32_t x) { return __builtin_bswap32(x); } + +static inline uint64_t CRYPTO_bswap8(uint64_t x) { + return __builtin_bswap64(x); +} #elif defined(_MSC_VER) #pragma warning(push, 3) #include @@ -272,6 +276,20 @@ static inline uint32_t CRYPTO_bswap4(uint32_t x) { static inline uint32_t CRYPTO_bswap4(uint32_t x) { return _byteswap_ulong(x); } + +static inline uint64_t CRYPTO_bswap8(uint64_t x) { + return _byteswap_uint64(x); +} +#else +static inline uint32_t CRYPTO_bswap4(uint32_t x) { + x = (x >> 16) | (x << 16); + x = ((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8); + return x; +} + +static inline uint64_t CRYPTO_bswap8(uint64_t x) { + return CRYPTO_bswap4(x >> 32) | (((uint64_t)CRYPTO_bswap4(x)) << 32); +} #endif #if !defined(RING_CORE_NOSTDLIBINC) @@ -309,4 +327,74 @@ static inline void *OPENSSL_memset(void *dst, int c, size_t n) { #endif } +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define RING_BIG_ENDIAN +#endif +#endif + +static inline uint32_t CRYPTO_read_le32(const uint8_t *p) { + uint32_t v; + OPENSSL_memcpy(&v, p, sizeof(v)); +#ifdef RING_BIG_ENDIAN + v = CRYPTO_bswap4(v); +#endif + return v; +} + +static inline uint32_t CRYPTO_read_be32(const uint8_t *p) { + uint32_t v; + OPENSSL_memcpy(&v, p, sizeof(v)); +#ifndef RING_BIG_ENDIAN + v = CRYPTO_bswap4(v); +#endif + return v; +} + +static inline uint64_t CRYPTO_read_le64(const uint8_t *p) { + uint64_t v; + OPENSSL_memcpy(&v, p, sizeof(v)); +#ifdef RING_BIG_ENDIAN + v = CRYPTO_bswap8(v); +#endif + return v; +} + +static inline uint64_t CRYPTO_read_be64(const uint8_t *p) { + uint64_t v; + OPENSSL_memcpy(&v, p, sizeof(v)); +#ifndef RING_BIG_ENDIAN + v = CRYPTO_bswap8(v); +#endif + return v; +} + +static inline void CRYPTO_write_le32(uint32_t v, uint8_t *p) { +#ifdef RING_BIG_ENDIAN + v = CRYPTO_bswap4(v); +#endif + OPENSSL_memcpy(p, &v, sizeof(v)); +} + +static inline void CRYPTO_write_be32(uint32_t v, uint8_t *p) { +#ifndef RING_BIG_ENDIAN + v = CRYPTO_bswap4(v); +#endif + OPENSSL_memcpy(p, &v, sizeof(v)); +} + +static inline void CRYPTO_write_le64(uint64_t v, uint8_t *p) { +#ifdef RING_BIG_ENDIAN + v = CRYPTO_bswap8(v); +#endif + OPENSSL_memcpy(p, &v, sizeof(v)); +} + +static inline void CRYPTO_write_be64(uint64_t v, uint8_t *p) { +#ifndef RING_BIG_ENDIAN + v = CRYPTO_bswap8(v); +#endif + OPENSSL_memcpy(p, &v, sizeof(v)); +} + #endif // OPENSSL_HEADER_CRYPTO_INTERNAL_H diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c index 6d864aa923..cf9d1c8dac 100644 --- a/crypto/poly1305/poly1305.c +++ b/crypto/poly1305/poly1305.c @@ -29,15 +29,12 @@ #pragma GCC diagnostic ignored "-Wconversion" #endif -// We can assume little-endian. static uint32_t U8TO32_LE(const uint8_t *m) { - uint32_t r; - OPENSSL_memcpy(&r, m, sizeof(r)); - return r; + return CRYPTO_read_le32(m); } static void U32TO8_LE(uint8_t *m, uint32_t v) { - OPENSSL_memcpy(m, &v, sizeof(v)); + CRYPTO_write_le32(v, m); } static uint64_t mul32x32_64(uint32_t a, uint32_t b) { return (uint64_t)a * b; } diff --git a/include/ring-core/base.h b/include/ring-core/base.h index f1a027d1a4..69cf40ca20 100644 --- a/include/ring-core/base.h +++ b/include/ring-core/base.h @@ -89,6 +89,9 @@ #elif defined(__MIPSEL__) && defined(__LP64__) #define OPENSSL_64_BIT #define OPENSSL_MIPS64 +#elif defined(__s390x__) +#define OPENSSL_64_BIT +#define OPENSSL_S390X #elif defined(__wasm__) #define OPENSSL_32_BIT #else