Skip to content

Commit

Permalink
ARM64: avoid memory barrier in InlinedMemmoveGCRefsHelper if dest is …
Browse files Browse the repository at this point in the history
…not on heap (dotnet#102084)

* don't put full barrier if dest is not on the heap

* Address feedback

---------

Co-authored-by: Jan Kotas <[email protected]>
  • Loading branch information
2 people authored and Ruihan-Yin committed May 30, 2024
1 parent e1bafc7 commit 92f314c
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 59 deletions.
12 changes: 10 additions & 2 deletions src/coreclr/classlibnative/bcltype/arraynative.inl
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,12 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t
_ASSERTE(CheckPointer(dest));
_ASSERTE(CheckPointer(src));

GCHeapMemoryBarrier();
const bool notInHeap = ((BYTE*)dest < g_lowest_address || (BYTE*)dest >= g_highest_address);

if (!notInHeap)
{
GCHeapMemoryBarrier();
}

// To be able to copy forwards, the destination buffer cannot start inside the source buffer
if ((size_t)dest - (size_t)src >= len)
Expand All @@ -319,7 +324,10 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t
InlinedBackwardGCSafeCopyHelper(dest, src, len);
}

InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len);
if (!notInHeap)
{
InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len);
}
}

#endif // !_ARRAYNATIVE_INL_
23 changes: 17 additions & 6 deletions src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,28 @@ FCIMPLEND

FCIMPL3(void, RhBulkMoveWithWriteBarrier, uint8_t* pDest, uint8_t* pSrc, size_t cbDest)
{
// It is possible that the bulk write is publishing object references accessible so far only
// by the current thread to shared memory.
// The memory model requires that writes performed by current thread are observable no later
// than the writes that will actually publish the references.
GCHeapMemoryBarrier();
if (cbDest == 0 || pDest == pSrc)
return;

const bool notInHeap = pDest < g_lowest_address || pDest >= g_highest_address;

if (!notInHeap)
{
// It is possible that the bulk write is publishing object references accessible so far only
// by the current thread to shared memory.
// The memory model requires that writes performed by current thread are observable no later
// than the writes that will actually publish the references.
GCHeapMemoryBarrier();
}

if (pDest <= pSrc || pSrc + cbDest <= pDest)
InlineForwardGCSafeCopy(pDest, pSrc, cbDest);
else
InlineBackwardGCSafeCopy(pDest, pSrc, cbDest);

InlinedBulkWriteBarrier(pDest, cbDest);
if (!notInHeap)
{
InlinedBulkWriteBarrier(pDest, cbDest);
}
}
FCIMPLEND
11 changes: 3 additions & 8 deletions src/coreclr/nativeaot/Runtime/GCMemoryHelpers.inl
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,9 @@ FORCEINLINE void InlineCheckedWriteBarrier(void * dst, void * ref)

FORCEINLINE void InlinedBulkWriteBarrier(void* pMemStart, size_t cbMemSize)
{
// Check whether the writes were even into the heap. If not there's no card update required.
// Also if the size is smaller than a pointer, no write barrier is required.
// This case can occur with universal shared generic code where the size
// is not known at compile time.
if (pMemStart < g_lowest_address || (pMemStart >= g_highest_address) || (cbMemSize < sizeof(uintptr_t)))
{
return;
}
// Caller is expected to check whether the writes were even into the heap
ASSERT(cbMemSize >= sizeof(uintptr_t));
ASSERT((pMemStart >= g_lowest_address) && (pMemStart < g_highest_address));

#ifdef WRITE_BARRIER_CHECK
// Perform shadow heap updates corresponding to the gc heap updates that immediately preceded this helper
Expand Down
37 changes: 1 addition & 36 deletions src/coreclr/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,39 +1484,4 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref)
}
}
}
}

//----------------------------------------------------------------------------
//
// Write Barrier Support for bulk copy ("Clone") operations
//
// StartPoint is the target bulk copy start point
// len is the length of the bulk copy (in bytes)
//
//
// Performance Note:
//
// This is implemented somewhat "conservatively", that is we
// assume that all the contents of the bulk copy are object
// references. If they are not, and the value lies in the
// ephemeral range, we will set false positives in the card table.
//
// We could use the pointer maps and do this more accurately if necessary

#if defined(_MSC_VER) && defined(TARGET_X86)
#pragma optimize("y", on) // Small critical routines, don't put in EBP frame
#endif //_MSC_VER && TARGET_X86

void
SetCardsAfterBulkCopy(Object **start, size_t len)
{
// If the size is smaller than a pointer, no write barrier is required.
if (len >= sizeof(uintptr_t))
{
InlinedSetCardsAfterBulkCopyHelper(start, len);
}
}

#if defined(_MSC_VER) && defined(TARGET_X86)
#pragma optimize("", on) // Go back to command line default optimizations
#endif //_MSC_VER && TARGET_X86
}
1 change: 0 additions & 1 deletion src/coreclr/vm/gchelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ extern void ThrowOutOfMemoryDimensionsExceeded();
//========================================================================

void ErectWriteBarrier(OBJECTREF* dst, OBJECTREF ref);
void SetCardsAfterBulkCopy(Object **start, size_t len);

void PublishFrozenObject(Object*& orObject);

Expand Down
8 changes: 2 additions & 6 deletions src/coreclr/vm/gchelpers.inl
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,9 @@

FORCEINLINE void InlinedSetCardsAfterBulkCopyHelper(Object **start, size_t len)
{
// Check whether the writes were even into the heap. If not there's no card update required.
// Also if the size is smaller than a pointer, no write barrier is required.
// Caller is expected to check whether the writes were even into the heap
_ASSERTE(len >= sizeof(uintptr_t));
if ((BYTE*)start < g_lowest_address || (BYTE*)start >= g_highest_address)
{
return;
}
_ASSERTE(((BYTE*)start >= g_lowest_address) && ((BYTE*)start < g_highest_address));

// Don't optimize the Generation 0 case if we are checking for write barrier violations
// since we need to update the shadow heap even in the generation 0 case.
Expand Down
3 changes: 3 additions & 0 deletions src/mono/mono/metadata/icall.c
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,9 @@ ves_icall_System_Runtime_RuntimeImports_Memmove (guint8 *destination, guint8 *so
void
ves_icall_System_Buffer_BulkMoveWithWriteBarrier (guint8 *destination, guint8 *source, size_t len, MonoType *type)
{
if (len == 0 || destination == source)
return;

if (MONO_TYPE_IS_REFERENCE (type))
mono_gc_wbarrier_arrayref_copy_internal (destination, source, (guint)len);
else
Expand Down

0 comments on commit 92f314c

Please sign in to comment.