Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64: avoid memory barrier in InlinedMemmoveGCRefsHelper if dest is not on heap #102084

Merged
merged 9 commits into from
May 12, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/coreclr/classlibnative/bcltype/arraynative.inl
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,12 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t
_ASSERTE(CheckPointer(dest));
_ASSERTE(CheckPointer(src));

GCHeapMemoryBarrier();
const bool notInHeap = ((BYTE*)dest < g_lowest_address || (BYTE*)dest >= g_highest_address);
EgorBo marked this conversation as resolved.
Show resolved Hide resolved

if (!notInHeap)
{
GCHeapMemoryBarrier();
}

// To be able to copy forwards, the destination buffer cannot start inside the source buffer
if ((size_t)dest - (size_t)src >= len)
Expand All @@ -319,7 +324,10 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t
InlinedBackwardGCSafeCopyHelper(dest, src, len);
}

InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len);
if (!notInHeap)
{
InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len);
}
}

#endif // !_ARRAYNATIVE_INL_
23 changes: 17 additions & 6 deletions src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,28 @@ FCIMPLEND

FCIMPL3(void, RhBulkMoveWithWriteBarrier, uint8_t* pDest, uint8_t* pSrc, size_t cbDest)
{
// It is possible that the bulk write is publishing object references accessible so far only
// by the current thread to shared memory.
// The memory model requires that writes performed by current thread are observable no later
// than the writes that will actually publish the references.
GCHeapMemoryBarrier();
ASSERT(cbDest > 0);
ASSERT(pDest != pSrc);

const bool notInHeap = pDest < g_lowest_address || pDest >= g_highest_address;

if (!notInHeap)
{
// It is possible that the bulk write is publishing object references accessible so far only
// by the current thread to shared memory.
// The memory model requires that writes performed by current thread are observable no later
// than the writes that will actually publish the references.
GCHeapMemoryBarrier();
}

if (pDest <= pSrc || pSrc + cbDest <= pDest)
InlineForwardGCSafeCopy(pDest, pSrc, cbDest);
else
InlineBackwardGCSafeCopy(pDest, pSrc, cbDest);

InlinedBulkWriteBarrier(pDest, cbDest);
if (!notInHeap)
{
InlinedBulkWriteBarrier(pDest, cbDest);
}
}
FCIMPLEND
11 changes: 3 additions & 8 deletions src/coreclr/nativeaot/Runtime/GCMemoryHelpers.inl
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,9 @@ FORCEINLINE void InlineCheckedWriteBarrier(void * dst, void * ref)

FORCEINLINE void InlinedBulkWriteBarrier(void* pMemStart, size_t cbMemSize)
{
// Check whether the writes were even into the heap. If not there's no card update required.
// Also if the size is smaller than a pointer, no write barrier is required.
// This case can occur with universal shared generic code where the size
// is not known at compile time.
if (pMemStart < g_lowest_address || (pMemStart >= g_highest_address) || (cbMemSize < sizeof(uintptr_t)))
{
return;
}
// Caller is expected to check whether the writes were even into the heap
ASSERT(cbMemSize >= sizeof(uintptr_t));
ASSERT((pMemStart >= g_lowest_address) && (pMemStart < g_highest_address));

#ifdef WRITE_BARRIER_CHECK
// Perform shadow heap updates corresponding to the gc heap updates that immediately preceded this helper
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/vm/comutilnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,9 @@ FCIMPL3(VOID, Buffer::BulkMoveWithWriteBarrier, void *dst, void *src, size_t byt
{
FCALL_CONTRACT;

if (dst != src && byteCount != 0)
InlinedMemmoveGCRefsHelper(dst, src, byteCount);
_ASSERTE(dst != src);
_ASSERTE(byteCount != 0);
InlinedMemmoveGCRefsHelper(dst, src, byteCount);

FC_GC_POLL();
}
Expand Down
37 changes: 1 addition & 36 deletions src/coreclr/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,39 +1484,4 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref)
}
}
}
}

//----------------------------------------------------------------------------
//
// Write Barrier Support for bulk copy ("Clone") operations
//
// StartPoint is the target bulk copy start point
// len is the length of the bulk copy (in bytes)
//
//
// Performance Note:
//
// This is implemented somewhat "conservatively", that is we
// assume that all the contents of the bulk copy are object
// references. If they are not, and the value lies in the
// ephemeral range, we will set false positives in the card table.
//
// We could use the pointer maps and do this more accurately if necessary

#if defined(_MSC_VER) && defined(TARGET_X86)
#pragma optimize("y", on) // Small critical routines, don't put in EBP frame
#endif //_MSC_VER && TARGET_X86

void
SetCardsAfterBulkCopy(Object **start, size_t len)
{
// If the size is smaller than a pointer, no write barrier is required.
if (len >= sizeof(uintptr_t))
{
InlinedSetCardsAfterBulkCopyHelper(start, len);
}
}

#if defined(_MSC_VER) && defined(TARGET_X86)
#pragma optimize("", on) // Go back to command line default optimizations
#endif //_MSC_VER && TARGET_X86
}
1 change: 0 additions & 1 deletion src/coreclr/vm/gchelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ extern void ThrowOutOfMemoryDimensionsExceeded();
//========================================================================

void ErectWriteBarrier(OBJECTREF* dst, OBJECTREF ref);
void SetCardsAfterBulkCopy(Object **start, size_t len);

void PublishFrozenObject(Object*& orObject);

Expand Down
8 changes: 2 additions & 6 deletions src/coreclr/vm/gchelpers.inl
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,9 @@

FORCEINLINE void InlinedSetCardsAfterBulkCopyHelper(Object **start, size_t len)
{
// Check whether the writes were even into the heap. If not there's no card update required.
// Also if the size is smaller than a pointer, no write barrier is required.
// Caller is expected to check whether the writes were even into the heap
_ASSERTE(len >= sizeof(uintptr_t));
if ((BYTE*)start < g_lowest_address || (BYTE*)start >= g_highest_address)
{
return;
}
_ASSERTE(((BYTE*)start >= g_lowest_address) && ((BYTE*)start < g_highest_address));

// Don't optimize the Generation 0 case if we are checking for write barrier violations
// since we need to update the shadow heap even in the generation 0 case.
Expand Down
7 changes: 4 additions & 3 deletions src/libraries/System.Private.CoreLib/src/System/Buffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ ref Unsafe.As<T, byte>(ref source),
#endif
internal static void BulkMoveWithWriteBarrier(ref byte destination, ref byte source, nuint byteCount)
{
if (byteCount == 0 || Unsafe.AreSame(ref destination, ref source))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method seems to be structured to allow inlining of BulkMoveWithWriteBarrier at every callsite.

I think it would be better to keep these checks inside the non-inlined portion of the helpers. The empty buffer or same buffer are rare cases. We do not need to optimize for them by inlining these checks at every callsite.

return;

if (byteCount <= BulkMoveWithWriteBarrierChunk)
__BulkMoveWithWriteBarrier(ref destination, ref source, byteCount);
else
Expand All @@ -193,9 +196,7 @@ internal static void BulkMoveWithWriteBarrier(ref byte destination, ref byte sou
private static void _BulkMoveWithWriteBarrier(ref byte destination, ref byte source, nuint byteCount)
{
Debug.Assert(byteCount > BulkMoveWithWriteBarrierChunk);

if (Unsafe.AreSame(ref source, ref destination))
return;
Debug.Assert(!Unsafe.AreSame(ref source, ref destination));

// This is equivalent to: (destination - source) >= byteCount || (destination - source) < 0
jkotas marked this conversation as resolved.
Show resolved Hide resolved
if ((nuint)(nint)Unsafe.ByteOffset(ref source, ref destination) >= byteCount)
Expand Down
Loading