diff --git a/src/array.c b/src/array.c index 3d660dc78594b9..dd57c09edced48 100644 --- a/src/array.c +++ b/src/array.c @@ -20,6 +20,34 @@ extern "C" { #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes) +// this is a version of memcpy that preserves atomic memory ordering +// which makes it safe to use for objects that can contain memory references +// without risk of creating pointers out of thin air +// TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32 +// aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit) +void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT +{ + size_t i; + if (dstp < srcp || dstp > srcp + n) { + for (i = 0; i < n; i++) { + jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i)); + } + } + else { + for (i = 0; i < n; i++) { + jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1)); + } + } +} + +void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT +{ + if (hasptr) + memmove_refs((void**)dst, (void**)src, nb / sizeof(void*)); + else + memmove(dst, src, nb); +} + // array constructors --------------------------------------------------------- char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT { @@ -542,10 +570,9 @@ JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t { assert(i < jl_array_len(a)); assert(a->flags.ptrarray); - jl_value_t *elt = ((jl_value_t**)a->data)[i]; - if (elt == NULL) { + jl_value_t *elt = jl_atomic_load_relaxed(((jl_value_t**)a->data) + i); + if (elt == NULL) jl_throw(jl_undefref_exception); - } return elt; } @@ -569,7 +596,7 @@ JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i) JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i) { if (a->flags.ptrarray) { - return ((jl_value_t**)jl_array_data(a))[i] != NULL; + return jl_atomic_load_relaxed(((jl_value_t**)jl_array_data(a)) + i) != NULL; } else if (a->flags.hasptr) { jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a)); @@ -600,12 +627,17 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs))) return; } - jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs); + if (a->flags.hasptr) { + memmove_refs((void**)&((char*)a->data)[i * a->elsize], (void**)rhs, a->elsize / sizeof(void*)); + } + else { + jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs); + } if (a->flags.hasptr) jl_gc_multi_wb(jl_array_owner(a), rhs); } else { - ((jl_value_t**)a->data)[i] = rhs; + jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, rhs); jl_gc_wb(jl_array_owner(a), rhs); } } @@ -615,7 +647,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i) if (i >= jl_array_len(a)) jl_bounds_error_int((jl_value_t*)a, i + 1); if (a->flags.ptrarray) - ((jl_value_t**)a->data)[i] = NULL; + jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, NULL); else if (a->flags.hasptr) { size_t elsize = a->elsize; jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0); @@ -762,7 +794,7 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc, if (isbitsunion) newtypetagdata = typetagdata - inc; if (idx > 0) { // inserting new elements after 1st element - memmove(newdata, data, idx * elsz); + memmove_safe(a->flags.hasptr, newdata, data, idx * elsz); if (isbitsunion) { memmove(newtypetagdata, typetagdata, idx); memset(newtypetagdata + idx, 0, inc); @@ -796,11 +828,11 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc, // We could use memcpy if resizing allocates a new buffer, // hopefully it's not a particularly important optimization. if (idx > 0 && newdata < data) { - memmove(newdata, data, nb1); + memmove_safe(a->flags.hasptr, newdata, data, nb1); } - memmove(newdata + nbinc + nb1, data + nb1, n * elsz - nb1); + memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1); if (idx > 0 && newdata > data) { - memmove(newdata, data, nb1); + memmove_safe(a->flags.hasptr, newdata, data, nb1); } a->offset = newoffset; } @@ -810,16 +842,16 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc, newdata = data - oldoffsnb + a->offset * elsz; if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset; if (idx > 0 && newdata < data) { - memmove(newdata, data, nb1); + memmove_safe(a->flags.hasptr, newdata, data, nb1); if (isbitsunion) { memmove(newtypetagdata, typetagdata, idx); memset(newtypetagdata + idx, 0, inc); } } - memmove(newdata + nbinc + nb1, data + nb1, n * elsz - nb1); + memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1); if (isbitsunion) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx); if (idx > 0 && newdata > data) { - memmove(newdata, data, nb1); + memmove_safe(a->flags.hasptr, newdata, data, nb1); if (isbitsunion) { memmove(newtypetagdata, typetagdata, idx); memset(newtypetagdata + idx, 0, inc); @@ -891,7 +923,7 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx, memmove(newtypetagdata, typetagdata, idx); memset(newtypetagdata + idx, 0, inc); } - if (has_gap) memmove(newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1); + if (has_gap) memmove_safe(a->flags.hasptr, newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1); } a->data = data = newdata; } @@ -901,7 +933,7 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx, memset(typetagdata + idx, 0, inc); } size_t nb1 = idx * elsz; - memmove(data + nb1 + inc * elsz, data + nb1, n * elsz - nb1); + memmove_safe(a->flags.hasptr, data + nb1 + inc * elsz, data + nb1, n * elsz - nb1); } else { // there was enough room for requested growth already in a->maxsize @@ -1036,12 +1068,12 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec, if (elsz == 1 && !isbitsunion) nbtotal++; if (idx > 0) { - memmove(newdata, olddata, nb1); + memmove_safe(a->flags.hasptr, newdata, olddata, nb1); if (isbitsunion) memmove(newtypetagdata, typetagdata, idx); } // Move the rest of the data if the offset changed if (newoffs != offset) { - memmove(newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1); + memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1); if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, n - idx); } a->data = newdata; @@ -1063,7 +1095,7 @@ STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec, int isbitsunion = jl_array_isbitsunion(a); size_t last = idx + dec; if (n > last) { - memmove(data + idx * elsz, data + last * elsz, (n - last) * elsz); + memmove_safe(a->flags.hasptr, data + idx * elsz, data + last * elsz, (n - last) * elsz); if (isbitsunion) { char *typetagdata = jl_array_typetagdata(a); memmove(typetagdata + idx, typetagdata + last, n - last); @@ -1161,14 +1193,14 @@ JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary) } // Copy element by element until we hit a young object, at which point -// we can continue using `memmove`. +// we can finish by using `memmove`. static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner, void **src_p, void **dest_p, ssize_t n) { for (ssize_t i = 0; i < n; i++) { - void *val = src_p[i]; - dest_p[i] = val; + void *val = jl_atomic_load_relaxed(src_p + i); + jl_atomic_store_relaxed(dest_p + i, val); // `val` is young or old-unmarked if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { jl_gc_queue_root(owner); @@ -1183,8 +1215,8 @@ static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner, ssize_t n) { for (ssize_t i = 0; i < n; i++) { - void *val = src_p[n - i - 1]; - dest_p[n - i - 1] = val; + void *val = jl_atomic_load_relaxed(src_p + n - i - 1); + jl_atomic_store_relaxed(dest_p + n - i - 1, val); // `val` is young or old-unmarked if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { jl_gc_queue_root(owner); @@ -1218,7 +1250,7 @@ JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p, n -= done; } } - memmove(dest_p, src_p, n * sizeof(void*)); + memmove_refs(dest_p, src_p, n); } JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item) diff --git a/src/atomics.h b/src/atomics.h index c5d8b579d140d5..0af087038da034 100644 --- a/src/atomics.h +++ b/src/atomics.h @@ -44,6 +44,8 @@ * specified. */ #if defined(__GNUC__) +# define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST) +# define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE) # define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST) # define jl_atomic_fetch_add_relaxed(obj, arg) \ __atomic_fetch_add(obj, arg, __ATOMIC_RELAXED) @@ -96,6 +98,9 @@ # define jl_atomic_load_relaxed(obj) \ __atomic_load_n(obj, __ATOMIC_RELAXED) #elif defined(_COMPILER_MICROSOFT_) +// TODO: these only define compiler barriers, and aren't correct outside of x86 +# define jl_fence() _ReadWriteBarrier() +# define jl_fence_release() _WriteBarrier() # define jl_signal_fence() _ReadWriteBarrier() // add diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 4a37228d28528f..c68db711e341fd 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1379,26 +1379,32 @@ static void typed_store(jl_codectx_t &ctx, if (type_is_ghost(elty)) return; Value *r; - if (!isboxed) { + if (!isboxed) r = emit_unbox(ctx, elty, rhs, jltype); - if (parent != NULL) - emit_write_multibarrier(ctx, parent, r); - } - else { + else r = maybe_decay_untracked(boxed(ctx, rhs)); - if (parent != NULL) - emit_write_barrier(ctx, parent, r); - } Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace()); if (ptr->getType() != ptrty) ptr = ctx.builder.CreateBitCast(ptr, ptrty); if (idx_0based) ptr = ctx.builder.CreateInBoundsGEP(r->getType(), ptr, idx_0based); - Instruction *store = ctx.builder.CreateAlignedStore(r, ptr, isboxed || alignment ? alignment : julia_alignment(jltype)); + if (isboxed) + alignment = sizeof(void*); + else if (!alignment) + alignment = julia_alignment(jltype); + StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, alignment); + if (isboxed) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0 + store->setOrdering(AtomicOrdering::Unordered); if (aliasscope) store->setMetadata("noalias", aliasscope); if (tbaa) tbaa_decorate(tbaa, store); + if (parent != NULL) { + if (!isboxed) + emit_write_multibarrier(ctx, parent, r); + else + emit_write_barrier(ctx, parent, r); + } } // --- convert boolean value to julia --- @@ -2611,8 +2617,10 @@ static void emit_setfield(jl_codectx_t &ctx, jl_value_t *jfty = jl_svecref(sty->types, idx0); if (jl_field_isptr(sty, idx0)) { Value *r = maybe_decay_untracked(boxed(ctx, rhs)); // don't need a temporary gcroot since it'll be rooted by strct - tbaa_decorate(strct.tbaa, ctx.builder.CreateStore(r, - emit_bitcast(ctx, addr, T_pprjlvalue))); + cast(tbaa_decorate(strct.tbaa, ctx.builder.CreateAlignedStore(r, + emit_bitcast(ctx, addr, T_pprjlvalue), + sizeof(jl_value_t*)))) + ->setOrdering(AtomicOrdering::Unordered); if (wb && strct.isboxed) emit_write_barrier(ctx, boxed(ctx, strct), r); } @@ -2706,7 +2714,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (jl_field_isptr(sty, i)) { fval = boxed(ctx, fval_info); if (!init_as_value) - tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedStore(fval, dest, jl_field_align(sty, i))); + cast(tbaa_decorate(tbaa_stack, + ctx.builder.CreateAlignedStore(fval, dest, jl_field_align(sty, i)))) + ->setOrdering(AtomicOrdering::Unordered); } else if (jl_is_uniontype(jtype)) { // compute tindex from rhs diff --git a/src/datatype.c b/src/datatype.c index d22551895d99a9..9f4e2429fc3138 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -1011,7 +1011,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i) assert(i < jl_datatype_nfields(st)); size_t offs = jl_field_offset(st, i); if (jl_field_isptr(st, i)) { - return *(jl_value_t**)((char*)v + offs); + return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs)); } jl_value_t *ty = jl_field_type(st, i); if (jl_is_uniontype(ty)) { @@ -1029,7 +1029,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_RO assert(i < jl_datatype_nfields(st)); size_t offs = jl_field_offset(st,i); assert(jl_field_isptr(st,i)); - return *(jl_value_t**)((char*)v + offs); + return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs)); } JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i) @@ -1039,7 +1039,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i) jl_bounds_error_int(v, i + 1); size_t offs = jl_field_offset(st, i); if (jl_field_isptr(st, i)) { - jl_value_t *fval = *(jl_value_t**)((char*)v + offs); + jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs)); if (__unlikely(fval == NULL)) jl_throw(jl_undefref_exception); return fval; @@ -1058,10 +1058,13 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i) void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT { size_t offs = jl_field_offset(st, i); + if (rhs == NULL) { // TODO: this should be invalid, but it happens frequently in ircode.c + assert(jl_field_isptr(st, i) && *(jl_value_t**)((char*)v + offs) == NULL); + return; + } if (jl_field_isptr(st, i)) { - *(jl_value_t**)((char*)v + offs) = rhs; - if (rhs != NULL) - jl_gc_wb(v, rhs); + jl_atomic_store_relaxed((jl_value_t**)((char*)v + offs), rhs); + jl_gc_wb(v, rhs); } else { jl_value_t *ty = jl_field_type_concrete(st, i); @@ -1085,7 +1088,8 @@ JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) size_t offs = jl_field_offset(st, i); char *fld = (char*)v + offs; if (jl_field_isptr(st, i)) { - return *(jl_value_t**)fld != NULL; + jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)fld); + return fval != NULL; } jl_datatype_t *ft = (jl_datatype_t*)jl_field_type(st, i); if (jl_is_datatype(ft) && ft->layout->first_ptr >= 0) { diff --git a/src/dump.c b/src/dump.c index 68503642f1393e..2ce8e41d980296 100644 --- a/src/dump.c +++ b/src/dump.c @@ -1910,7 +1910,7 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets) while (codeinst) { if (codeinst->min_world > 0) codeinst->max_world = ~(size_t)0; - codeinst = codeinst->next; + codeinst = jl_atomic_load_relaxed(&codeinst->next); } } else { diff --git a/src/gf.c b/src/gf.c index 8f394099cd26d2..9dfb028e7a7707 100644 --- a/src/gf.c +++ b/src/gf.c @@ -341,7 +341,7 @@ JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *mi, size_t mi if (code && (code == jl_nothing || jl_ir_flag_inferred((jl_array_t*)code))) return (jl_value_t*)codeinst; } - codeinst = codeinst->next; + codeinst = jl_atomic_load_relaxed(&codeinst->next); } return (jl_value_t*)jl_nothing; } @@ -358,7 +358,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred( jl_egal(codeinst->rettype, rettype)) { return codeinst; } - codeinst = codeinst->next; + codeinst = jl_atomic_load_relaxed(&codeinst->next); } codeinst = jl_new_codeinst( mi, rettype, NULL, NULL, @@ -407,7 +407,7 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN if (jl_is_method(mi->def.method)) JL_LOCK(&mi->def.method->writelock); ci->next = mi->cache; - mi->cache = ci; + jl_atomic_store_release(&mi->cache, ci); jl_gc_wb(mi, ci); if (jl_is_method(mi->def.method)) JL_UNLOCK(&mi->def.method->writelock); @@ -1365,7 +1365,7 @@ static void invalidate_method_instance(jl_method_instance_t *replaced, size_t ma codeinst->max_world = max_world; } assert(codeinst->max_world <= max_world); - codeinst = codeinst->next; + codeinst = jl_atomic_load_relaxed(&codeinst->next); } // recurse to all backedges to update their valid range also jl_array_t *backedges = replaced->backedges; @@ -1811,7 +1811,7 @@ jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi, size_t world) if (codeinst->min_world <= world && world <= codeinst->max_world && codeinst->invoke != NULL) { return codeinst; } - codeinst = codeinst->next; + codeinst = jl_atomic_load_relaxed(&codeinst->next); } return NULL; } @@ -2103,7 +2103,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t jl_value_t *res = codeinst->invoke(F, args, nargs, codeinst); return verify_type(res); } - codeinst = codeinst->next; + codeinst = jl_atomic_load_relaxed(&codeinst->next); } int64_t last_alloc = jl_options.malloc_log ? jl_gc_diff_total_bytes() : 0; int last_errno = errno; diff --git a/src/julia.h b/src/julia.h index ccfcb3c096e25f..e30380dc9046b5 100644 --- a/src/julia.h +++ b/src/julia.h @@ -829,7 +829,9 @@ STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NO { assert(jl_typeis(t,jl_simplevector_type)); assert(i < jl_svec_len(t)); - return jl_svec_data(t)[i]; + // while svec is supposedly immutable, in practice we sometimes publish it first + // and set the values lazily + return jl_atomic_load_relaxed(jl_svec_data(t) + i); } STATIC_INLINE jl_value_t *jl_svecset( void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT, @@ -837,6 +839,8 @@ STATIC_INLINE jl_value_t *jl_svecset( { assert(jl_typeis(t,jl_simplevector_type)); assert(i < jl_svec_len(t)); + // TODO: while svec is supposedly immutable, in practice we sometimes publish it first + // and set the values lazily. Those users should be using jl_atomic_store_release here. jl_svec_data(t)[i] = (jl_value_t*)x; if (x) jl_gc_wb(t, x); return (jl_value_t*)x; @@ -871,7 +875,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) { assert(((jl_array_t*)a)->flags.ptrarray); assert(i < jl_array_len(a)); - return ((jl_value_t**)(jl_array_data(a)))[i]; + return jl_atomic_load_relaxed(((jl_value_t**)(jl_array_data(a))) + i); } STATIC_INLINE jl_value_t *jl_array_ptr_set( void *a JL_ROOTING_ARGUMENT, size_t i, @@ -879,7 +883,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set( { assert(((jl_array_t*)a)->flags.ptrarray); assert(i < jl_array_len(a)); - ((jl_value_t**)(jl_array_data(a)))[i] = (jl_value_t*)x; + jl_atomic_store_relaxed(((jl_value_t**)(jl_array_data(a))) + i, (jl_value_t*)x); if (x) { if (((jl_array_t*)a)->flags.how == 3) { a = jl_array_data_owner(a); diff --git a/src/precompile.c b/src/precompile.c index 515379d7d0f7f0..ca0922f0e7bc9b 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -334,7 +334,7 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi); return 1; } - codeinst = codeinst->next; + codeinst = jl_atomic_load_relaxed(&codeinst->next); } return 1; }