Skip to content

Commit

Permalink
Normalize floating point environment on startup
Browse files Browse the repository at this point in the history
In #38419 we noticed that on AArch64 Darwin, processes start with the
FZ and DN floating point flags set. To maintain consistency across platforms,
reset these flags whenever we start julia. Not that default_nans isn't available
on x86, so while we do have julia-level wrappers for the fz flag, I'm not
adding any for the dn flag. The function is exported, so if somebody really
needs it in some aarch64-specific code, they can ccall it.
  • Loading branch information
Keno committed Nov 13, 2020
1 parent 2c9160c commit a940fc5
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 1 deletion.
8 changes: 8 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,13 @@ static void jl_set_io_wait(int v)

extern jl_mutex_t jl_modules_mutex;

static void restore_fp_env(void)
{
if (jl_set_zero_subnormals(0) || jl_set_default_nans(0)) {
jl_error("Failed to configure floating point environment");
}
}

void _julia_init(JL_IMAGE_SEARCH rel)
{
jl_init_timing();
Expand All @@ -634,6 +641,7 @@ void _julia_init(JL_IMAGE_SEARCH rel)
// best to call this first, since it also initializes libuv
jl_init_uv();
init_stdio();
restore_fp_env();
restore_signals();

jl_page_size = jl_getpagesize();
Expand Down
4 changes: 4 additions & 0 deletions src/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
// For debugging only
JL_DLLEXPORT void jl_dump_host_cpu(void);

JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero);
JL_DLLEXPORT int32_t jl_get_zero_subnormals(void);
JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isZero);
JL_DLLEXPORT int32_t jl_get_default_nans(void);
#ifdef __cplusplus
}

Expand Down
27 changes: 26 additions & 1 deletion src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1808,8 +1808,10 @@ extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
}

#ifdef _CPU_AARCH64_
// FZ, bit [24]
// FPCR FZ, bit [24]
static constexpr uint32_t fpcr_fz_mask = 1 << 24;
// FPCR DN, bit [25]
static constexpr uint32_t fpcr_dn_mask = 1 << 25;

static inline uint32_t get_fpcr_aarch64(void)
{
Expand All @@ -1835,6 +1837,19 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
set_fpcr_aarch64(fpcr);
return 0;
}

extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
{
return (get_fpcr_aarch64() & fpcr_fz_mask) != 0;
}

extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault)
{
uint32_t fpcr = get_fpcr_aarch64();
fpcr = isDefault ? (fpcr | fpcr_dn_mask) : (fpcr & ~fpcr_dn_mask);
set_fpcr_aarch64(fpcr);
return 0;
}
#else
extern "C" JL_DLLEXPORT int32_t jl_get_zero_subnormals(void)
{
Expand All @@ -1845,4 +1860,14 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
{
return isZero;
}

extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
{
return 0;
}

extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault)
{
return isDefault;
}
#endif
10 changes: 10 additions & 0 deletions src/processor_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,13 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
{
return isZero;
}

extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
{
return 0;
}

extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isDefault)
{
return isDefault;
}
11 changes: 11 additions & 0 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1102,3 +1102,14 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
return isZero;
}
}

// X86 does not support default NaNs
extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
{
return 0;
}

extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault)
{
return isDefault;
}

0 comments on commit a940fc5

Please sign in to comment.