Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Yet another) Optimization for TLS access #17220

Merged
merged 1 commit into from
Jul 1, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1235,7 +1235,15 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
emit_signal_fence();
return ghostValue(jl_void_type);
}
if (fptr == (void(*)(void))&jl_get_ptls_states ||
#ifdef _OS_LINUX_
// directly access the address of a ifunc can cause linker issue on
// some configurations (e.g. AArch64 + -Bsymbolic-functions).
static const auto ptls_getter = jl_dlsym_e(jl_dlopen(nullptr, 0),
"jl_get_ptls_states");
#else
static const auto ptls_getter = &jl_get_ptls_states;
#endif
if (fptr == (void(*)(void))(uintptr_t)ptls_getter ||
((!f_lib || (intptr_t)f_lib == 2) && f_name &&
strcmp(f_name, "jl_get_ptls_states") == 0)) {
assert(lrt == T_pint8);
Expand Down
86 changes: 81 additions & 5 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ extern "C" {
#include "threadgroup.h"
#include "threading.h"

// The tls_states buffer:
//
// On platforms that do not use ELF (i.e. where `__thread` is emulated with
// lower level API) (Mac, Windows), we use the platform runtime API to create
// TLS variable directly.
// This is functionally equivalent to using `__thread` but can be
// more efficient since we can have better control over the creation and
// initialization of the TLS buffer.
//
// On platforms that use ELF (Linux, FreeBSD), we use a `__thread` variable
// as the fallback in the shared object. For better efficiency, we also
// create a `__thread` variable in the main executable using a static TLS
// model.
#ifdef JULIA_ENABLE_THREADING
# if defined(_OS_DARWIN_)
// Mac doesn't seem to have static TLS model so the runtime TLS getter
Expand Down Expand Up @@ -117,12 +130,43 @@ jl_get_ptls_states_func jl_get_ptls_states_getter(void)
return &jl_get_ptls_states;
}
# else
// We use the faster static version in the main executable to replace
// the slower version in the shared object. The code in different libraries
// or executables, however, have to agree on which version to use.
// The general solution is to add one more indirection in the C entry point
// (see `jl_get_ptls_states_wrapper`).
//
// When `ifunc` is availabe, we can use it to trick the linker to use the
// real address (`jl_get_ptls_states_static`) directly as the symbol address.
// (see `jl_get_ptls_states_resolve`).
//
// However, since the detection of the static version in `ifunc`
// is not guaranteed to be reliable, we still need to fallback to the wrapper
// version as the symbol address if we didn't find the static version in `ifunc`.
#if defined(__GLIBC__) && (defined(_CPU_X86_64_) || defined(_CPU_X86_) || \
defined(_CPU_AARCH64_) || defined(_CPU_ARM_))
// Only enable this on architectures that are tested.
// For example, GCC doesn't seem to support the `ifunc` attribute on power yet.
# if __GLIBC_PREREQ(2, 12)
# define JL_TLS_USE_IFUNC
# endif
#endif
// Disable ifunc on clang <= 3.8 since it is not supported
#if defined(JL_TLS_USE_IFUNC) && defined(__clang__)
# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 8)
# undef JL_TLS_USE_IFUNC
# endif
#endif
// fallback provided for embedding
static JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_fallback(void)
{
static __thread jl_tls_states_t tls_states;
return &tls_states;
}
#ifdef JL_TLS_USE_IFUNC
JL_DLLEXPORT JL_CONST_FUNC __attribute__((weak))
jl_tls_states_t *jl_get_ptls_states_static(void);
#endif
static jl_tls_states_t *jl_get_ptls_states_init(void);
static jl_get_ptls_states_func jl_tls_states_cb = jl_get_ptls_states_init;
static jl_tls_states_t *jl_get_ptls_states_init(void)
Expand All @@ -135,25 +179,57 @@ static jl_tls_states_t *jl_get_ptls_states_init(void)
// This is clearly not thread safe but should be fine since we
// make sure the tls states callback is finalized before adding
// multiple threads
jl_tls_states_cb = jl_get_ptls_states_fallback;
return jl_get_ptls_states_fallback();
jl_get_ptls_states_func cb = jl_get_ptls_states_fallback;
#ifdef JL_TLS_USE_IFUNC
if (jl_get_ptls_states_static)
cb = jl_get_ptls_states_static;
#endif
jl_tls_states_cb = cb;
return cb();
}
JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *(jl_get_ptls_states)(void)

static JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_wrapper(void)
{
return (*jl_tls_states_cb)();
}

JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f)
{
if (f == jl_tls_states_cb || !f)
return;
// only allow setting this once
if (f && f != jl_get_ptls_states_init &&
jl_tls_states_cb == jl_get_ptls_states_init) {
if (jl_tls_states_cb == jl_get_ptls_states_init) {
jl_tls_states_cb = f;
}
else {
jl_safe_printf("ERROR: Attempt to change TLS address.\n");
exit(1);
}
}

#ifdef JL_TLS_USE_IFUNC
static jl_get_ptls_states_func jl_get_ptls_states_resolve(void)
{
if (jl_tls_states_cb != jl_get_ptls_states_init)
return jl_tls_states_cb;
// If we can't find the static version, return the wrapper instead
// of the slow version so that we won't resolve to the slow version
// due to issues in the relocation order.
// This may not be necessary once `ifunc` support in glibc is more mature.
if (!jl_get_ptls_states_static)
return jl_get_ptls_states_wrapper;
jl_tls_states_cb = jl_get_ptls_states_static;
return jl_tls_states_cb;
}

JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *(jl_get_ptls_states)(void)
__attribute__((ifunc ("jl_get_ptls_states_resolve")));
#else // JL_TLS_USE_IFUNC
JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *(jl_get_ptls_states)(void)
{
return jl_get_ptls_states_wrapper();
}
#endif // JL_TLS_USE_IFUNC
jl_get_ptls_states_func jl_get_ptls_states_getter(void)
{
if (jl_tls_states_cb == jl_get_ptls_states_init)
Expand Down
16 changes: 7 additions & 9 deletions ui/repl.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,17 @@ extern "C" {
#endif

#if defined(JULIA_ENABLE_THREADING) && !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
static JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_static(void)
JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_static(void)
{
static __attribute__((tls_model("local-exec"))) __thread jl_tls_states_t tls_states;
return &tls_states;
}
__attribute__((constructor)) void jl_register_ptls_states_getter(void)
{
// We need to make sure this function is called before any reference to
// TLS variables.
jl_set_ptls_states_getter(jl_get_ptls_states_static);
}
#endif

static int lisp_prompt = 0;
Expand Down Expand Up @@ -655,14 +661,6 @@ int wmain(int argc, wchar_t *argv[], wchar_t *envp[])
if (!WideCharToMultiByte(CP_UTF8, 0, warg, -1, arg, len, NULL, NULL)) return 1;
argv[i] = (wchar_t*)arg;
}
#endif
#if defined(JULIA_ENABLE_THREADING) && !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
// We need to make sure this function is called before any reference to
// TLS variables. Since the compiler is free to move calls to
// `jl_get_ptls_states()` around, we should avoid referencing TLS
// variables in this function. (Mark `true_main` as noinline for this
// reason).
jl_set_ptls_states_getter(jl_get_ptls_states_static);
#endif
libsupport_init();
parse_opts(&argc, (char***)&argv);
Expand Down