From 78fc0bd1db4ba98d3210372a9ae0a1b95c16ca10 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 8 Aug 2023 10:36:15 +0100 Subject: [PATCH 01/11] Define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS on Windows-on-Arm Signed-off-by: Dave Rodgman --- library/alignment.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/library/alignment.h b/library/alignment.h index ab15986e5176..211e7ac37048 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -35,11 +35,16 @@ * efficient when this is not defined. */ #if defined(__ARM_FEATURE_UNALIGNED) \ - || defined(__i386__) || defined(__amd64__) || defined(__x86_64__) + || defined(__i386__) || defined(__amd64__) || defined(__x86_64__) \ + || defined(_M_ARM64) || defined(_M_ARM64EC) /* * __ARM_FEATURE_UNALIGNED is defined where appropriate by armcc, gcc 7, clang 9 * (and later versions) for Arm v7 and later; all x86 platforms should have * efficient unaligned access. + * + * https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#alignment + * specifies that on Windows-on-Arm64, unaligned access is safe (except for uncached + * device memory). */ #define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS #endif From ad71b6a834b136249a2aee549f4b85644b424377 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 8 Aug 2023 10:37:33 +0100 Subject: [PATCH 02/11] Support ARM64EC in the same way as ARM64 in sha256 and sha512 Signed-off-by: Dave Rodgman --- library/sha256.c | 4 ++-- library/sha512.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/library/sha256.c b/library/sha256.c index 223badf00f07..20d5188b6bab 100644 --- a/library/sha256.c +++ b/library/sha256.c @@ -110,7 +110,7 @@ # include # endif # endif -#elif defined(_M_ARM64) +#elif defined(_M_ARM64) || defined(_M_ARM64EC) # if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \ defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) # include @@ -135,7 +135,7 @@ static int mbedtls_a64_crypto_sha256_determine_support(void) { return 1; } -#elif defined(_M_ARM64) +#elif defined(_M_ARM64) || defined(_M_ARM64EC) #define WIN32_LEAN_AND_MEAN #include #include diff --git a/library/sha512.c b/library/sha512.c index e739af25465a..23e8745a529c 100644 --- a/library/sha512.c +++ b/library/sha512.c @@ -154,7 +154,7 @@ static int mbedtls_a64_crypto_sha512_determine_support(void) NULL, 0); return ret == 0 && value != 0; } -#elif defined(_M_ARM64) +#elif defined(_M_ARM64) || defined(_M_ARM64EC) /* * As of March 2022, there don't appear to be any PF_ARM_V8_* flags * available to pass to IsProcessorFeaturePresent() to check for From be0928666614cf52510845ad1d3c44607b075f1d Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 8 Aug 2023 10:42:55 +0100 Subject: [PATCH 03/11] Enable 8-byte fastpath in mbedtls_xor on ARM64 and ARM64EC Signed-off-by: Dave Rodgman --- library/common.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/library/common.h b/library/common.h index 3c472c685daf..f83821691061 100644 --- a/library/common.h +++ b/library/common.h @@ -188,7 +188,8 @@ inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned uint8x16_t x = veorq_u8(v1, v2); vst1q_u8(r + i, x); } -#elif defined(__amd64__) || defined(__x86_64__) || defined(__aarch64__) +#elif defined(__amd64__) || defined(__x86_64__) || \ + defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* This codepath probably only makes sense on architectures with 64-bit registers */ for (; (i + 8) <= n; i += 8) { uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i); @@ -227,7 +228,8 @@ static inline void mbedtls_xor_no_simd(unsigned char *r, { size_t i = 0; #if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) -#if defined(__amd64__) || defined(__x86_64__) || defined(__aarch64__) +#if defined(__amd64__) || defined(__x86_64__) || \ + defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* This codepath probably only makes sense on architectures with 64-bit registers */ for (; (i + 8) <= n; i += 8) { uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i); From 4ffd7c7614b8f6a9a083de0b9e8c5588ca987e4a Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 5 Sep 2023 11:43:02 +0100 Subject: [PATCH 04/11] Introduce MBEDTLS_HAVE_NEON_INTRINSICS and simplify NEON header inclusion Signed-off-by: Dave Rodgman --- library/common.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/library/common.h b/library/common.h index f83821691061..c080af043f61 100644 --- a/library/common.h +++ b/library/common.h @@ -33,8 +33,14 @@ #if defined(__ARM_NEON) #include +#define MBEDTLS_HAVE_NEON_INTRINSICS #endif /* __ARM_NEON */ +#if defined(_M_ARM64) || defined(_M_ARM64EC) +#include +#define MBEDTLS_HAVE_NEON_INTRINSICS +#endif + /** Helper to define a function as static except when building invasive tests. * * If a function is only used inside its own source file and should be From a0f10da9d22abea8879af814d739632ed9d30d0a Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 5 Sep 2023 11:43:17 +0100 Subject: [PATCH 05/11] Use MBEDTLS_HAVE_NEON_INTRINSICS instead of __ARM_NEON Signed-off-by: Dave Rodgman --- library/aesce.c | 6 ++---- library/common.h | 2 +- library/sha256.c | 13 +++---------- library/sha512.c | 6 ++---- 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/library/aesce.c b/library/aesce.c index 8b42b034f5e7..21ec47daa13f 100644 --- a/library/aesce.c +++ b/library/aesce.c @@ -26,7 +26,7 @@ * By defining the macros ourselves we gain access to those declarations without * requiring -march on the command line. * - * `arm_neon.h` could be included by any header file, so we put these defines + * `arm_neon.h` is included by common.h, so we put these defines * at the top of this file, before any includes. */ #define __ARM_FEATURE_CRYPTO 1 @@ -66,9 +66,7 @@ # endif #endif -#ifdef __ARM_NEON -#include -#else +#if !defined(MBEDTLS_HAVE_NEON_INTRINSICS) #error "Target does not support NEON instructions" #endif diff --git a/library/common.h b/library/common.h index c080af043f61..fd2aecb208c1 100644 --- a/library/common.h +++ b/library/common.h @@ -187,7 +187,7 @@ inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned { size_t i = 0; #if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) -#if defined(__ARM_NEON) +#if defined(MBEDTLS_HAVE_NEON_INTRINSICS) for (; (i + 16) <= n; i += 16) { uint8x16_t v1 = vld1q_u8(a + i); uint8x16_t v2 = vld1q_u8(b + i); diff --git a/library/sha256.c b/library/sha256.c index 20d5188b6bab..da6ec180cda8 100644 --- a/library/sha256.c +++ b/library/sha256.c @@ -31,7 +31,7 @@ * By defining the macros ourselves we gain access to those declarations without * requiring -march on the command line. * - * `arm_neon.h` could be included by any header file, so we put these defines + * `arm_neon.h` is included by common.h, so we put these defines * at the top of this file, before any includes. */ #define __ARM_FEATURE_CRYPTO 1 @@ -63,9 +63,7 @@ /* *INDENT-OFF* */ -# ifdef __ARM_NEON -# include -# else +# if !defined(MBEDTLS_HAVE_NEON_INTRINSICS) # error "Target does not support NEON instructions" # endif @@ -110,12 +108,7 @@ # include # endif # endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \ - defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) -# include -# endif -#else +#elif !(defined(_M_ARM64) || defined(_M_ARM64EC)) # undef MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY # undef MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT #endif diff --git a/library/sha512.c b/library/sha512.c index 23e8745a529c..0e99914dda0d 100644 --- a/library/sha512.c +++ b/library/sha512.c @@ -31,7 +31,7 @@ * By defining the macros ourselves we gain access to those declarations without * requiring -march on the command line. * - * `arm_neon.h` could be included by any header file, so we put these defines + * `arm_neon.h` is included by common.h, so we put these defines * at the top of this file, before any includes. */ #define __ARM_FEATURE_SHA512 1 @@ -60,9 +60,7 @@ # if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \ defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) /* *INDENT-OFF* */ -# ifdef __ARM_NEON -# include -# else +# if !defined(MBEDTLS_HAVE_NEON_INTRINSICS) # error "Target does not support NEON instructions" # endif /* From c5cc727dd04d1a5df16487d30c81a6a2ff2f0e0e Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 15 Sep 2023 11:41:17 +0100 Subject: [PATCH 06/11] Use new MBEDTLS_ARCH_IS_xxx macros Signed-off-by: Dave Rodgman --- library/alignment.h | 2 +- library/common.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/library/alignment.h b/library/alignment.h index 211e7ac37048..ff811e34d523 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -35,7 +35,7 @@ * efficient when this is not defined. */ #if defined(__ARM_FEATURE_UNALIGNED) \ - || defined(__i386__) || defined(__amd64__) || defined(__x86_64__) \ + || defined(MBEDTLS_ARCH_IS_X86) || defined(MBEDTLS_ARCH_IS_X64) \ || defined(_M_ARM64) || defined(_M_ARM64EC) /* * __ARM_FEATURE_UNALIGNED is defined where appropriate by armcc, gcc 7, clang 9 diff --git a/library/common.h b/library/common.h index fd2aecb208c1..6c65084f46ea 100644 --- a/library/common.h +++ b/library/common.h @@ -194,8 +194,7 @@ inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned uint8x16_t x = veorq_u8(v1, v2); vst1q_u8(r + i, x); } -#elif defined(__amd64__) || defined(__x86_64__) || \ - defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#elif defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64) /* This codepath probably only makes sense on architectures with 64-bit registers */ for (; (i + 8) <= n; i += 8) { uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i); @@ -234,8 +233,7 @@ static inline void mbedtls_xor_no_simd(unsigned char *r, { size_t i = 0; #if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) -#if defined(__amd64__) || defined(__x86_64__) || \ - defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#if defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64) /* This codepath probably only makes sense on architectures with 64-bit registers */ for (; (i + 8) <= n; i += 8) { uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i); From 0a48717b83f3629b2f43ae925f18db0c7d0409d5 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 15 Sep 2023 11:52:06 +0100 Subject: [PATCH 07/11] Simplify Windows-on-Arm macros Signed-off-by: Dave Rodgman --- include/mbedtls/build_info.h | 4 ++++ library/alignment.h | 2 +- library/bignum.c | 3 ++- library/common.h | 4 +--- library/sha256.c | 4 ++-- library/sha512.c | 9 ++------- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/mbedtls/build_info.h b/include/mbedtls/build_info.h index 842f15c58fcd..cb2cda76d1f8 100644 --- a/include/mbedtls/build_info.h +++ b/include/mbedtls/build_info.h @@ -74,6 +74,10 @@ #define MBEDTLS_ARCH_IS_X86 #endif +#if defined(_M_ARM64) || defined(_M_ARM64EC) +#define MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64 +#endif + #if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) #define _CRT_SECURE_NO_DEPRECATE 1 #endif diff --git a/library/alignment.h b/library/alignment.h index ff811e34d523..d8c4fb384bd5 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -36,7 +36,7 @@ */ #if defined(__ARM_FEATURE_UNALIGNED) \ || defined(MBEDTLS_ARCH_IS_X86) || defined(MBEDTLS_ARCH_IS_X64) \ - || defined(_M_ARM64) || defined(_M_ARM64EC) + || defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) /* * __ARM_FEATURE_UNALIGNED is defined where appropriate by armcc, gcc 7, clang 9 * (and later versions) for Arm v7 and later; all x86 platforms should have diff --git a/library/bignum.c b/library/bignum.c index 7c265e04da4e..795952ccd0fd 100644 --- a/library/bignum.c +++ b/library/bignum.c @@ -114,7 +114,8 @@ int mbedtls_mpi_lt_mpi_ct(const mbedtls_mpi *X, * about whether the assignment was made or not. * (Leaking information about the respective sizes of X and Y is ok however.) */ -#if defined(_MSC_VER) && defined(_M_ARM64) && (_MSC_FULL_VER < 193131103) +#if defined(_MSC_VER) && defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) && \ + (_MSC_FULL_VER < 193131103) /* * MSVC miscompiles this function if it's inlined prior to Visual Studio 2022 version 17.1. See: * https://developercommunity.visualstudio.com/t/c-compiler-miscompiles-part-of-mbedtls-library-on/1646989 diff --git a/library/common.h b/library/common.h index 6c65084f46ea..48fb6d0d73ae 100644 --- a/library/common.h +++ b/library/common.h @@ -34,9 +34,7 @@ #if defined(__ARM_NEON) #include #define MBEDTLS_HAVE_NEON_INTRINSICS -#endif /* __ARM_NEON */ - -#if defined(_M_ARM64) || defined(_M_ARM64EC) +#elif defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) #include #define MBEDTLS_HAVE_NEON_INTRINSICS #endif diff --git a/library/sha256.c b/library/sha256.c index da6ec180cda8..ed47c7c51053 100644 --- a/library/sha256.c +++ b/library/sha256.c @@ -108,7 +108,7 @@ # include # endif # endif -#elif !(defined(_M_ARM64) || defined(_M_ARM64EC)) +#elif !defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) # undef MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY # undef MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT #endif @@ -128,7 +128,7 @@ static int mbedtls_a64_crypto_sha256_determine_support(void) { return 1; } -#elif defined(_M_ARM64) || defined(_M_ARM64EC) +#elif defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) #define WIN32_LEAN_AND_MEAN #include #include diff --git a/library/sha512.c b/library/sha512.c index 0e99914dda0d..05b89408f8e4 100644 --- a/library/sha512.c +++ b/library/sha512.c @@ -119,12 +119,7 @@ # include # endif # endif -#elif defined(_M_ARM64) -# if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \ - defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) -# include -# endif -#else +#elif !defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) # undef MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY # undef MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT #endif @@ -152,7 +147,7 @@ static int mbedtls_a64_crypto_sha512_determine_support(void) NULL, 0); return ret == 0 && value != 0; } -#elif defined(_M_ARM64) || defined(_M_ARM64EC) +#elif defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) /* * As of March 2022, there don't appear to be any PF_ARM_V8_* flags * available to pass to IsProcessorFeaturePresent() to check for From 3e521849231f31df0af4f4d860b8f2417eb64d2a Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Mon, 18 Sep 2023 10:36:21 +0100 Subject: [PATCH 08/11] Make macro definition more consistent with similar defns Signed-off-by: Dave Rodgman --- include/mbedtls/build_info.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/mbedtls/build_info.h b/include/mbedtls/build_info.h index cb2cda76d1f8..89564506459b 100644 --- a/include/mbedtls/build_info.h +++ b/include/mbedtls/build_info.h @@ -74,7 +74,8 @@ #define MBEDTLS_ARCH_IS_X86 #endif -#if defined(_M_ARM64) || defined(_M_ARM64EC) +#if !defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) && \ + (defined(_M_ARM64) || defined(_M_ARM64EC)) #define MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64 #endif From d879b47b527e11569678c0895f9bdab66eae5e20 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 30 Nov 2023 09:35:14 +0000 Subject: [PATCH 09/11] tidy up macros in mbedtls_xor Signed-off-by: Dave Rodgman --- library/common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/common.h b/library/common.h index 55bea8ceee77..e532777e78e6 100644 --- a/library/common.h +++ b/library/common.h @@ -183,8 +183,8 @@ inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned size_t i = 0; #if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) #if defined(MBEDTLS_HAVE_NEON_INTRINSICS) && \ - (!defined(MBEDTLS_COMPILER_IS_GCC) || \ - (defined(MBEDTLS_COMPILER_IS_GCC) && MBEDTLS_GCC_VERSION >= 70300)) + (!(defined(MBEDTLS_COMPILER_IS_GCC) && MBEDTLS_GCC_VERSION < 70300)) + /* Old GCC versions generate a warning here, so disable the NEON path for these compilers */ for (; (i + 16) <= n; i += 16) { uint8x16_t v1 = vld1q_u8(a + i); uint8x16_t v2 = vld1q_u8(b + i); From 12d1c3ad4fe58f056af1d332e4f0cc9cc672eca0 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 30 Nov 2023 09:38:38 +0000 Subject: [PATCH 10/11] Use MBEDTLS_HAVE_NEON_INTRINSICS in aesce Signed-off-by: Dave Rodgman --- library/aesce.c | 2 +- library/aesce.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/library/aesce.c b/library/aesce.c index 2879be5a181f..afbb369eb51b 100644 --- a/library/aesce.c +++ b/library/aesce.c @@ -45,7 +45,7 @@ #include "aesce.h" -#if defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON) +#if defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(MBEDTLS_HAVE_NEON_INTRINSICS) /* Compiler version checks. */ #if defined(__clang__) diff --git a/library/aesce.h b/library/aesce.h index cf12d7f8d178..6b64f45d0ac0 100644 --- a/library/aesce.h +++ b/library/aesce.h @@ -15,11 +15,13 @@ #define MBEDTLS_AESCE_H #include "mbedtls/build_info.h" +#include "common.h" #include "mbedtls/aes.h" -#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON) +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARMV8_A) && \ + defined(MBEDTLS_HAVE_NEON_INTRINSICS) #define MBEDTLS_AESCE_HAVE_CODE @@ -124,6 +126,6 @@ int mbedtls_aesce_setkey_enc(unsigned char *rk, #error "AES hardware acceleration not supported on this platform" #endif -#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARMV8_A && __ARM_NEON */ +#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARMV8_A && MBEDTLS_HAVE_NEON_INTRINSICS */ #endif /* MBEDTLS_AESCE_H */ From 059f66ce7c8a40589718ad98502e6fb542a2a7bf Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 30 Nov 2023 11:02:03 +0000 Subject: [PATCH 11/11] Remove redundant check Signed-off-by: Dave Rodgman --- library/aesce.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/library/aesce.c b/library/aesce.c index 17f09aa556b1..eaaa5b5c3f24 100644 --- a/library/aesce.c +++ b/library/aesce.c @@ -76,10 +76,6 @@ # endif #endif -#if !defined(MBEDTLS_HAVE_NEON_INTRINSICS) -#error "Target does not support NEON instructions" -#endif - #if !(defined(__ARM_FEATURE_CRYPTO) || defined(__ARM_FEATURE_AES)) || \ defined(MBEDTLS_ENABLE_ARM_CRYPTO_EXTENSIONS_COMPILER_FLAG) # if defined(__ARMCOMPILER_VERSION)