Skip to content

Commit

Permalink
Optimize TLS access in C code on Linux x86 and x64
Browse files Browse the repository at this point in the history
* Use indirect function (`ifunc`) to set the address of a symbol at runtime.
* Use `weak` symbol to detect (in `ifunc`) whether the static version of
  TLS is available.
  • Loading branch information
yuyichao committed Jun 30, 2016
1 parent c23687f commit 840efd5
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 6 deletions.
83 changes: 78 additions & 5 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ extern "C" {
#include "threadgroup.h"
#include "threading.h"

// The tls_states buffer:
//
// On platforms that do not use ELF (i.e. where `__thread` is emulated with
// lower level API) (Mac, Windows), we use the platform runtime API to create
// TLS variable directly.
// This is functionally equivalent to using `__thread` but can be
// more efficient since we can have better control over the creation and
// initialization of the TLS buffer.
//
// On platforms that use ELF (Linux, FreeBSD), we use a `__thread` variable.
// as the fallback in the shared object. For better efficiency, we also
// create a `__thread` variable in the main executable using a static TLS
// model.
#ifdef JULIA_ENABLE_THREADING
# if defined(_OS_DARWIN_)
// Mac doesn't seem to have static TLS model so the runtime TLS getter
Expand Down Expand Up @@ -117,12 +130,40 @@ jl_get_ptls_states_func jl_get_ptls_states_getter(void)
return &jl_get_ptls_states;
}
# else
// We use the faster static version in the main executable to replace
// the slower version in the shared object. The code in different libraries
// or executable, however, has to agree on which version to use.
// The general solution is to add one more indirection in the C entry point
// (see `jl_get_ptls_states_wrapper`).
//
// When `ifunc` is availabe, we can use it to trick the linker to use the
// real address (`jl_get_ptls_states_static`) directly as the symbol address.
// (see `jl_get_ptls_states_resolve`).
//
// However, since the detection of the static version in `ifunc`
// is not guaranteed to be reliable, we still need to fallback to the wrapper
// version as the symbol address if we didn't find the static version in `ifunc`.
#if defined(__GLIBC__) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
# if __GLIBC_PREREQ(2, 12)
# define JL_TLS_USE_IFUNC
# endif
#endif
// Disable ifunc on clang <= 3.8 since it is not supported
#if defined(JL_TLS_USE_IFUNC) && defined(__clang__)
# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 8)
# undef JL_TLS_USE_IFUNC
# endif
#endif
// fallback provided for embedding
static JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_fallback(void)
{
static __thread jl_tls_states_t tls_states;
return &tls_states;
}
#ifdef JL_TLS_USE_IFUNC
JL_DLLEXPORT JL_CONST_FUNC __attribute__((weak))
jl_tls_states_t *jl_get_ptls_states_static(void);
#endif
static jl_tls_states_t *jl_get_ptls_states_init(void);
static jl_get_ptls_states_func jl_tls_states_cb = jl_get_ptls_states_init;
static jl_tls_states_t *jl_get_ptls_states_init(void)
Expand All @@ -135,25 +176,57 @@ static jl_tls_states_t *jl_get_ptls_states_init(void)
// This is clearly not thread safe but should be fine since we
// make sure the tls states callback is finalized before adding
// multiple threads
jl_tls_states_cb = jl_get_ptls_states_fallback;
return jl_get_ptls_states_fallback();
jl_get_ptls_states_func cb = jl_get_ptls_states_fallback;
#ifdef JL_TLS_USE_IFUNC
if (jl_get_ptls_states_static)
cb = jl_get_ptls_states_static;
#endif
jl_tls_states_cb = cb;
return cb();
}
JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *(jl_get_ptls_states)(void)

static JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_wrapper(void)
{
return (*jl_tls_states_cb)();
}

JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f)
{
if (f == jl_tls_states_cb || !f)
return;
// only allow setting this once
if (f && f != jl_get_ptls_states_init &&
jl_tls_states_cb == jl_get_ptls_states_init) {
if (jl_tls_states_cb == jl_get_ptls_states_init) {
jl_tls_states_cb = f;
}
else {
jl_safe_printf("ERROR: Attempt to change TLS address.\n");
exit(1);
}
}

#ifdef JL_TLS_USE_IFUNC
static jl_get_ptls_states_func jl_get_ptls_states_resolve(void)
{
if (jl_tls_states_cb != jl_get_ptls_states_init)
return jl_tls_states_cb;
// If we can't find the static version, return the wrapper instead
// of the slow version so that we won't resolve to the slow version
// due to issues in the relocation order.
// This may not be necessary once
if (!jl_get_ptls_states_static)
return jl_get_ptls_states_wrapper;
jl_tls_states_cb = jl_get_ptls_states_static;
return jl_tls_states_cb;
}

JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *(jl_get_ptls_states)(void)
__attribute__((ifunc ("jl_get_ptls_states_resolve")));
#else // JL_TLS_USE_IFUNC
JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *(jl_get_ptls_states)(void)
{
return jl_get_ptls_states_wrapper();
}
#endif // JL_TLS_USE_IFUNC
jl_get_ptls_states_func jl_get_ptls_states_getter(void)
{
if (jl_tls_states_cb == jl_get_ptls_states_init)
Expand Down
2 changes: 1 addition & 1 deletion ui/repl.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ extern "C" {
#endif

#if defined(JULIA_ENABLE_THREADING) && !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
static JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_static(void)
JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *jl_get_ptls_states_static(void)
{
static __attribute__((tls_model("local-exec"))) __thread jl_tls_states_t tls_states;
return &tls_states;
Expand Down

0 comments on commit 840efd5

Please sign in to comment.