Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lld] Sort code section chunks by range types on Arm64EC targets. #69099

Merged
merged 1 commit into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions lld/COFF/Chunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@
namespace lld::coff {

using llvm::COFF::ImportDirectoryTableEntry;
using llvm::object::COFFSymbolRef;
using llvm::object::SectionRef;
using llvm::object::chpe_range_type;
using llvm::object::coff_relocation;
using llvm::object::coff_section;
using llvm::object::COFFSymbolRef;
using llvm::object::SectionRef;

class Baserel;
class Defined;
Expand Down Expand Up @@ -114,6 +115,9 @@ class Chunk {
// synthesized by the linker.
bool isHotPatchable() const;

MachineTypes getMachine() const;
chpe_range_type getArm64ECRangeType() const;

protected:
Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}

Expand Down Expand Up @@ -164,6 +168,8 @@ class NonSectionChunk : public Chunk {
// Collect all locations that contain absolute addresses for base relocations.
virtual void getBaserels(std::vector<Baserel> *res) {}

virtual MachineTypes getMachine() const { return IMAGE_FILE_MACHINE_UNKNOWN; }

// Returns a human-readable name of this chunk. Chunks are unnamed chunks of
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }
Expand Down Expand Up @@ -420,6 +426,24 @@ inline StringRef Chunk::getDebugName() const {
return static_cast<const NonSectionChunk *>(this)->getDebugName();
}

inline MachineTypes Chunk::getMachine() const {
if (isa<SectionChunk>(this))
return static_cast<const SectionChunk *>(this)->getMachine();
else
return static_cast<const NonSectionChunk *>(this)->getMachine();
Comment on lines +432 to +433
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary else.

Suggested change
else
return static_cast<const NonSectionChunk *>(this)->getMachine();
return static_cast<const NonSectionChunk *>(this)->getMachine();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote it that way to be consistent with other similar functions in that file (just above the new one). Should such change be a separated NFC MR keeping them consistent?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, doing them as a follow up should be okay.

}

inline chpe_range_type Chunk::getArm64ECRangeType() const {
switch (getMachine()) {
case AMD64:
return chpe_range_type::Amd64;
case ARM64EC:
return chpe_range_type::Arm64EC;
default:
return chpe_range_type::Arm64;
}
}

// This class is used to implement an lld-specific feature (not implemented in
// MSVC) that minimizes the output size by finding string literals sharing tail
// parts and merging them.
Expand Down Expand Up @@ -506,6 +530,7 @@ class ImportThunkChunkX64 : public ImportThunkChunk {
explicit ImportThunkChunkX64(COFFLinkerContext &ctx, Defined *s);
size_t getSize() const override { return sizeof(importThunkX86); }
void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return AMD64; }
};

class ImportThunkChunkX86 : public ImportThunkChunk {
Expand All @@ -515,6 +540,7 @@ class ImportThunkChunkX86 : public ImportThunkChunk {
size_t getSize() const override { return sizeof(importThunkX86); }
void getBaserels(std::vector<Baserel> *res) override;
void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return I386; }
};

class ImportThunkChunkARM : public ImportThunkChunk {
Expand All @@ -526,6 +552,7 @@ class ImportThunkChunkARM : public ImportThunkChunk {
size_t getSize() const override { return sizeof(importThunkARM); }
void getBaserels(std::vector<Baserel> *res) override;
void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return ARMNT; }
};

class ImportThunkChunkARM64 : public ImportThunkChunk {
Expand All @@ -536,6 +563,7 @@ class ImportThunkChunkARM64 : public ImportThunkChunk {
}
size_t getSize() const override { return sizeof(importThunkARM64); }
void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return ARM64; }
};

class RangeExtensionThunkARM : public NonSectionChunk {
Expand All @@ -546,6 +574,7 @@ class RangeExtensionThunkARM : public NonSectionChunk {
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return ARMNT; }

Defined *target;

Expand All @@ -561,6 +590,7 @@ class RangeExtensionThunkARM64 : public NonSectionChunk {
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return ARM64; }

Defined *target;

Expand Down
8 changes: 8 additions & 0 deletions lld/COFF/DLL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ class ThunkChunkX64 : public NonSectionChunk {
ThunkChunkX64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {}

size_t getSize() const override { return sizeof(thunkX64); }
MachineTypes getMachine() const override { return AMD64; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkX64, sizeof(thunkX64));
Expand All @@ -334,6 +335,7 @@ class TailMergeChunkX64 : public NonSectionChunk {
TailMergeChunkX64(Chunk *d, Defined *h) : desc(d), helper(h) {}

size_t getSize() const override { return sizeof(tailMergeX64); }
MachineTypes getMachine() const override { return AMD64; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeX64, sizeof(tailMergeX64));
Expand Down Expand Up @@ -386,6 +388,7 @@ class ThunkChunkX86 : public NonSectionChunk {
: imp(i), tailMerge(tm), ctx(ctx) {}

size_t getSize() const override { return sizeof(thunkX86); }
MachineTypes getMachine() const override { return I386; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkX86, sizeof(thunkX86));
Expand All @@ -410,6 +413,7 @@ class TailMergeChunkX86 : public NonSectionChunk {
: desc(d), helper(h), ctx(ctx) {}

size_t getSize() const override { return sizeof(tailMergeX86); }
MachineTypes getMachine() const override { return I386; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeX86, sizeof(tailMergeX86));
Expand All @@ -436,6 +440,7 @@ class ThunkChunkARM : public NonSectionChunk {
}

size_t getSize() const override { return sizeof(thunkARM); }
MachineTypes getMachine() const override { return ARMNT; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkARM, sizeof(thunkARM));
Expand All @@ -462,6 +467,7 @@ class TailMergeChunkARM : public NonSectionChunk {
}

size_t getSize() const override { return sizeof(tailMergeARM); }
MachineTypes getMachine() const override { return ARMNT; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeARM, sizeof(tailMergeARM));
Expand All @@ -487,6 +493,7 @@ class ThunkChunkARM64 : public NonSectionChunk {
}

size_t getSize() const override { return sizeof(thunkARM64); }
MachineTypes getMachine() const override { return ARM64; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkARM64, sizeof(thunkARM64));
Expand All @@ -506,6 +513,7 @@ class TailMergeChunkARM64 : public NonSectionChunk {
}

size_t getSize() const override { return sizeof(tailMergeARM64); }
MachineTypes getMachine() const override { return ARM64; }

void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64));
Expand Down
17 changes: 17 additions & 0 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ class Writer {
void locateImportTables();
void createExportTable();
void mergeSections();
void sortECChunks();
void removeUnusedSections();
void assignAddresses();
bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin);
Expand Down Expand Up @@ -676,6 +677,7 @@ void Writer::run() {
createMiscChunks();
createExportTable();
mergeSections();
sortECChunks();
removeUnusedSections();
finalizeAddresses();
removeEmptySections();
Expand Down Expand Up @@ -1377,6 +1379,21 @@ void Writer::mergeSections() {
}
}

// EC targets may have chunks of various architectures mixed together at this
// point. Group code chunks of the same architecture together by sorting chunks
// by their EC range type.
void Writer::sortECChunks() {
if (!isArm64EC(ctx.config.machine))
return;

for (OutputSection *sec : ctx.outputSections) {
if (sec->isCodeSection())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right, here's a new call to the recently refactored function, sorry I didn't see it before. That makes the coupling much clearer.

llvm::stable_sort(sec->chunks, [=](const Chunk *a, const Chunk *b) {
return a->getArm64ECRangeType() < b->getArm64ECRangeType();
});
}
}

// Visits all sections to assign incremental, non-overlapping RVAs and
// file offsets.
void Writer::assignAddresses() {
Expand Down
97 changes: 97 additions & 0 deletions lld/test/COFF/Inputs/loadconfig-arm64ec.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
.section .rdata,"dr"
.globl _load_config_used
.p2align 3, 0
_load_config_used:
.word 0x140
.fill 0x54, 1, 0
.xword __security_cookie
.fill 0x10, 1, 0
.xword __guard_check_icall_fptr
.xword __guard_dispatch_icall_fptr
.xword __guard_fids_table
.xword __guard_fids_count
.xword __guard_flags
.xword 0
.xword __guard_iat_table
.xword __guard_iat_count
.xword __guard_longjmp_table
.xword __guard_longjmp_count
.xword 0
.xword __chpe_metadata
.fill 0x78, 1, 0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MS places the call helpers in the .cfg00 section, should that also be done here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that we will want to do that ultimately, but note that link.exe automatically merges .cfg00 section to .rdata, so it doesn't really change the output (it possibly changes the layout of .rdata itself). I think that we will want to do that merge in lld-link as well and then change the test.

__guard_check_icall_fptr:
.xword 0
__guard_dispatch_icall_fptr:
.xword 0
__os_arm64x_dispatch_call_no_redirect:
.xword 0
__os_arm64x_dispatch_ret:
.xword 0
__os_arm64x_check_call:
.xword 0
__os_arm64x_check_icall:
.xword 0
__os_arm64x_get_x64_information:
.xword 0
__os_arm64x_set_x64_information:
.xword 0
__os_arm64x_check_icall_cfg:
.xword 0
__os_arm64x_dispatch_fptr:
.xword 0
__os_arm64x_helper0:
.xword 0
__os_arm64x_helper1:
.xword 0
__os_arm64x_helper2:
.xword 0
__os_arm64x_helper3:
.xword 0
__os_arm64x_helper4:
.xword 0
__os_arm64x_helper5:
.xword 0
__os_arm64x_helper6:
.xword 0
__os_arm64x_helper7:
.xword 0
__os_arm64x_helper8:
.xword 0

.data
.globl __chpe_metadata
.p2align 3, 0
__chpe_metadata:
.word 1
.rva code_map
.word code_map_count
.word 0 // __x64_code_ranges_to_entry_points
.word 0 //__arm64x_redirection_metadata
.rva __os_arm64x_dispatch_call_no_redirect
.rva __os_arm64x_dispatch_ret
.rva __os_arm64x_check_call
.rva __os_arm64x_check_icall
.rva __os_arm64x_check_icall_cfg
.word 0 // __arm64x_native_entrypoint
.word 0 // __hybrid_auxiliary_iat
.word 0 // __x64_code_ranges_to_entry_points_count
.word 0 // __arm64x_redirection_metadata_count
.rva __os_arm64x_get_x64_information
.rva __os_arm64x_set_x64_information
.word 0 // __arm64x_extra_rfe_table
.word 0 // __arm64x_extra_rfe_table_size
.rva __os_arm64x_dispatch_fptr
.word 0 // __hybrid_auxiliary_iat_copy
.rva __os_arm64x_helper0
.rva __os_arm64x_helper1
.rva __os_arm64x_helper2
.rva __os_arm64x_helper3
.rva __os_arm64x_helper4
.rva __os_arm64x_helper5
.rva __os_arm64x_helper6
.rva __os_arm64x_helper7
.rva __os_arm64x_helper8

__security_cookie:
.xword 0
Loading