-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lld] Align EC code region boundaries. #69100
Conversation
@llvm/pr-subscribers-platform-windows @llvm/pr-subscribers-lld Author: Jacek Caban (cjacek) ChangesThis is a refreshed version of https://reviews.llvm.org/D157142 and depends on #69099. Boundaries between code chunks of different architecture are always aligned. 0x1000 seems to be a constant, this does not seem to be affected by any command line alignment argument. More info about my findings is here: https://wiki.winehq.org/ARM64ECToolchain#Code_layout This is compatible with MSVC and I suspect that it is motivated by emulator needing to watch for x86_64 code writes to invalidate JIT code. cc @bylaws Patch is 22.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69100.diff 8 Files Affected:
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index e17b64df869fe88..4e845afa8947a5f 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -437,7 +437,7 @@ void SectionChunk::applyRelocation(uint8_t *off,
// Compute the RVA of the relocation for relative relocations.
uint64_t p = rva + rel.VirtualAddress;
uint64_t imageBase = file->ctx.config.imageBase;
- switch (file->ctx.config.machine) {
+ switch (getMachine()) {
case AMD64:
applyRelX64(off, rel.Type, os, s, p, imageBase);
break;
@@ -551,7 +551,7 @@ static uint8_t getBaserelType(const coff_relocation &rel,
// Only called when base relocation is enabled.
void SectionChunk::getBaserels(std::vector<Baserel> *res) {
for (const coff_relocation &rel : getRelocs()) {
- uint8_t ty = getBaserelType(rel, file->ctx.config.machine);
+ uint8_t ty = getBaserelType(rel, getMachine());
if (ty == IMAGE_REL_BASED_ABSOLUTE)
continue;
Symbol *target = file->getSymbol(rel.SymbolTableIndex);
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 3d605e6ab10c8c8..4e500cafd3ce4a2 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -24,10 +24,11 @@
namespace lld::coff {
using llvm::COFF::ImportDirectoryTableEntry;
-using llvm::object::COFFSymbolRef;
-using llvm::object::SectionRef;
+using llvm::object::chpe_range_type;
using llvm::object::coff_relocation;
using llvm::object::coff_section;
+using llvm::object::COFFSymbolRef;
+using llvm::object::SectionRef;
class Baserel;
class Defined;
@@ -114,6 +115,9 @@ class Chunk {
// synthesized by the linker.
bool isHotPatchable() const;
+ MachineTypes getMachine() const;
+ chpe_range_type getArm64ECRangeType() const;
+
protected:
Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
@@ -164,6 +168,8 @@ class NonSectionChunk : public Chunk {
// Collect all locations that contain absolute addresses for base relocations.
virtual void getBaserels(std::vector<Baserel> *res) {}
+ virtual MachineTypes getMachine() const { return IMAGE_FILE_MACHINE_UNKNOWN; }
+
// Returns a human-readable name of this chunk. Chunks are unnamed chunks of
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }
@@ -219,6 +225,8 @@ class SectionChunk final : public Chunk {
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *buf) const;
+ MachineTypes getMachine() const { return file->getMachineType(); }
+
// Defend against unsorted relocations. This may be overly conservative.
void sortRelocations();
@@ -418,6 +426,24 @@ inline StringRef Chunk::getDebugName() const {
return static_cast<const NonSectionChunk *>(this)->getDebugName();
}
+inline MachineTypes Chunk::getMachine() const {
+ if (isa<SectionChunk>(this))
+ return static_cast<const SectionChunk *>(this)->getMachine();
+ else
+ return static_cast<const NonSectionChunk *>(this)->getMachine();
+}
+
+inline chpe_range_type Chunk::getArm64ECRangeType() const {
+ switch (getMachine()) {
+ case AMD64:
+ return chpe_range_type::Amd64;
+ case ARM64EC:
+ return chpe_range_type::Arm64EC;
+ default:
+ return chpe_range_type::Arm64;
+ }
+}
+
// This class is used to implement an lld-specific feature (not implemented in
// MSVC) that minimizes the output size by finding string literals sharing tail
// parts and merging them.
@@ -504,6 +530,7 @@ class ImportThunkChunkX64 : public ImportThunkChunk {
explicit ImportThunkChunkX64(COFFLinkerContext &ctx, Defined *s);
size_t getSize() const override { return sizeof(importThunkX86); }
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return AMD64; }
};
class ImportThunkChunkX86 : public ImportThunkChunk {
@@ -513,6 +540,7 @@ class ImportThunkChunkX86 : public ImportThunkChunk {
size_t getSize() const override { return sizeof(importThunkX86); }
void getBaserels(std::vector<Baserel> *res) override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return I386; }
};
class ImportThunkChunkARM : public ImportThunkChunk {
@@ -524,6 +552,7 @@ class ImportThunkChunkARM : public ImportThunkChunk {
size_t getSize() const override { return sizeof(importThunkARM); }
void getBaserels(std::vector<Baserel> *res) override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARMNT; }
};
class ImportThunkChunkARM64 : public ImportThunkChunk {
@@ -534,6 +563,7 @@ class ImportThunkChunkARM64 : public ImportThunkChunk {
}
size_t getSize() const override { return sizeof(importThunkARM64); }
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARM64; }
};
class RangeExtensionThunkARM : public NonSectionChunk {
@@ -544,6 +574,7 @@ class RangeExtensionThunkARM : public NonSectionChunk {
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARMNT; }
Defined *target;
@@ -559,6 +590,7 @@ class RangeExtensionThunkARM64 : public NonSectionChunk {
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARM64; }
Defined *target;
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 5977970104672c8..0b337a209c377db 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -318,6 +318,7 @@ class ThunkChunkX64 : public NonSectionChunk {
ThunkChunkX64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {}
size_t getSize() const override { return sizeof(thunkX64); }
+ MachineTypes getMachine() const override { return AMD64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkX64, sizeof(thunkX64));
@@ -334,6 +335,7 @@ class TailMergeChunkX64 : public NonSectionChunk {
TailMergeChunkX64(Chunk *d, Defined *h) : desc(d), helper(h) {}
size_t getSize() const override { return sizeof(tailMergeX64); }
+ MachineTypes getMachine() const override { return AMD64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeX64, sizeof(tailMergeX64));
@@ -386,6 +388,7 @@ class ThunkChunkX86 : public NonSectionChunk {
: imp(i), tailMerge(tm), ctx(ctx) {}
size_t getSize() const override { return sizeof(thunkX86); }
+ MachineTypes getMachine() const override { return I386; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkX86, sizeof(thunkX86));
@@ -410,6 +413,7 @@ class TailMergeChunkX86 : public NonSectionChunk {
: desc(d), helper(h), ctx(ctx) {}
size_t getSize() const override { return sizeof(tailMergeX86); }
+ MachineTypes getMachine() const override { return I386; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeX86, sizeof(tailMergeX86));
@@ -436,6 +440,7 @@ class ThunkChunkARM : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(thunkARM); }
+ MachineTypes getMachine() const override { return ARMNT; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkARM, sizeof(thunkARM));
@@ -462,6 +467,7 @@ class TailMergeChunkARM : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(tailMergeARM); }
+ MachineTypes getMachine() const override { return ARMNT; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeARM, sizeof(tailMergeARM));
@@ -487,6 +493,7 @@ class ThunkChunkARM64 : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(thunkARM64); }
+ MachineTypes getMachine() const override { return ARM64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkARM64, sizeof(thunkARM64));
@@ -506,6 +513,7 @@ class TailMergeChunkARM64 : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(tailMergeARM64); }
+ MachineTypes getMachine() const override { return ARM64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64));
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 4f6c2a57f533505..d215fc8935a6123 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -212,6 +212,7 @@ class Writer {
void locateImportTables();
void createExportTable();
void mergeSections();
+ void sortECChunks();
void removeUnusedSections();
void assignAddresses();
bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin);
@@ -676,6 +677,7 @@ void Writer::run() {
createMiscChunks();
createExportTable();
mergeSections();
+ sortECChunks();
removeUnusedSections();
finalizeAddresses();
removeEmptySections();
@@ -1377,6 +1379,21 @@ void Writer::mergeSections() {
}
}
+// EC targets may have chunks of various architectures mixed together at this
+// point. Group code chunks of the same architecture together by sorting chunks
+// by their EC range type.
+void Writer::sortECChunks() {
+ if (!isArm64EC(ctx.config.machine))
+ return;
+
+ for (OutputSection *sec : ctx.outputSections) {
+ if (sec->isCodeSection())
+ llvm::stable_sort(sec->chunks, [=](const Chunk *a, const Chunk *b) {
+ return a->getArm64ECRangeType() < b->getArm64ECRangeType();
+ });
+ }
+}
+
// Visits all sections to assign incremental, non-overlapping RVAs and
// file offsets.
void Writer::assignAddresses() {
@@ -1403,13 +1420,20 @@ void Writer::assignAddresses() {
// If /FUNCTIONPADMIN is used, functions are padded in order to create a
// hotpatchable image.
- const bool isCodeSection =
- (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) &&
- (sec->header.Characteristics & IMAGE_SCN_MEM_READ) &&
- (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE);
- uint32_t padding = isCodeSection ? config->functionPadMin : 0;
+ uint32_t padding = sec->isCodeSection() ? config->functionPadMin : 0;
+ MachineTypes prevECMachine = IMAGE_FILE_MACHINE_UNKNOWN;
for (Chunk *c : sec->chunks) {
+ if (isArm64EC(ctx.config.machine) && sec->isCodeSection()) {
+ MachineTypes machine = c->getMachine();
+ if (machine != IMAGE_FILE_MACHINE_UNKNOWN) {
+ // We need additional alignment when crossing EC range baudaries.
+ if (prevECMachine != IMAGE_FILE_MACHINE_UNKNOWN &&
+ machine != prevECMachine)
+ virtualSize = alignTo(virtualSize, 4096);
+ prevECMachine = machine;
+ }
+ }
if (padding && c->isHotPatchable())
virtualSize += padding;
virtualSize = alignTo(virtualSize, c->getAlignment());
diff --git a/lld/COFF/Writer.h b/lld/COFF/Writer.h
index 4a74aa7ada59d73..9004bb310d07305 100644
--- a/lld/COFF/Writer.h
+++ b/lld/COFF/Writer.h
@@ -64,6 +64,12 @@ class OutputSection {
// Used only when the name is longer than 8 bytes.
void setStringTableOff(uint32_t v) { stringTableOff = v; }
+ bool isCodeSection() const {
+ return (header.Characteristics & llvm::COFF::IMAGE_SCN_CNT_CODE) &&
+ (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ) &&
+ (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_EXECUTE);
+ }
+
// N.B. The section index is one based.
uint32_t sectionIndex = 0;
diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
new file mode 100644
index 000000000000000..1efd02406ca691b
--- /dev/null
+++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
@@ -0,0 +1,97 @@
+ .section .rdata,"dr"
+ .globl _load_config_used
+ .p2align 3, 0
+_load_config_used:
+ .word 0x140
+ .fill 0x54, 1, 0
+ .xword __security_cookie
+ .fill 0x10, 1, 0
+ .xword __guard_check_icall_fptr
+ .xword __guard_dispatch_icall_fptr
+ .xword __guard_fids_table
+ .xword __guard_fids_count
+ .xword __guard_flags
+ .xword 0
+ .xword __guard_iat_table
+ .xword __guard_iat_count
+ .xword __guard_longjmp_table
+ .xword __guard_longjmp_count
+ .xword 0
+ .xword __chpe_metadata
+ .fill 0x78, 1, 0
+
+__guard_check_icall_fptr:
+ .xword 0
+__guard_dispatch_icall_fptr:
+ .xword 0
+__os_arm64x_dispatch_call_no_redirect:
+ .xword 0
+__os_arm64x_dispatch_ret:
+ .xword 0
+__os_arm64x_check_call:
+ .xword 0
+__os_arm64x_check_icall:
+ .xword 0
+__os_arm64x_get_x64_information:
+ .xword 0
+__os_arm64x_set_x64_information:
+ .xword 0
+__os_arm64x_check_icall_cfg:
+ .xword 0
+__os_arm64x_dispatch_fptr:
+ .xword 0
+__os_arm64x_helper0:
+ .xword 0
+__os_arm64x_helper1:
+ .xword 0
+__os_arm64x_helper2:
+ .xword 0
+__os_arm64x_helper3:
+ .xword 0
+__os_arm64x_helper4:
+ .xword 0
+__os_arm64x_helper5:
+ .xword 0
+__os_arm64x_helper6:
+ .xword 0
+__os_arm64x_helper7:
+ .xword 0
+__os_arm64x_helper8:
+ .xword 0
+
+ .data
+ .globl __chpe_metadata
+ .p2align 3, 0
+__chpe_metadata:
+ .word 1
+ .rva code_map
+ .word code_map_count
+ .word 0 // __x64_code_ranges_to_entry_points
+ .word 0 //__arm64x_redirection_metadata
+ .rva __os_arm64x_dispatch_call_no_redirect
+ .rva __os_arm64x_dispatch_ret
+ .rva __os_arm64x_check_call
+ .rva __os_arm64x_check_icall
+ .rva __os_arm64x_check_icall_cfg
+ .word 0 // __arm64x_native_entrypoint
+ .word 0 // __hybrid_auxiliary_iat
+ .word 0 // __x64_code_ranges_to_entry_points_count
+ .word 0 // __arm64x_redirection_metadata_count
+ .rva __os_arm64x_get_x64_information
+ .rva __os_arm64x_set_x64_information
+ .word 0 // __arm64x_extra_rfe_table
+ .word 0 // __arm64x_extra_rfe_table_size
+ .rva __os_arm64x_dispatch_fptr
+ .word 0 // __hybrid_auxiliary_iat_copy
+ .rva __os_arm64x_helper0
+ .rva __os_arm64x_helper1
+ .rva __os_arm64x_helper2
+ .rva __os_arm64x_helper3
+ .rva __os_arm64x_helper4
+ .rva __os_arm64x_helper5
+ .rva __os_arm64x_helper6
+ .rva __os_arm64x_helper7
+ .rva __os_arm64x_helper8
+
+__security_cookie:
+ .xword 0
diff --git a/lld/test/COFF/arm64ec-codemap.test b/lld/test/COFF/arm64ec-codemap.test
new file mode 100644
index 000000000000000..0ee39b79f1712fc
--- /dev/null
+++ b/lld/test/COFF/arm64ec-codemap.test
@@ -0,0 +1,195 @@
+REQUIRES: aarch64, x86
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=arm64-windows arm64-func-sym.s -o arm64-func-sym.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-func-sym.s -o arm64ec-func-sym.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-func-sym.s -o x86_64-func-sym.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows codemap.s -o codemap.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows codemap2.s -o codemap2.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows codemap3.s -o codemap3.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+
+Link ARM64EC DLL and verify that the code is arranged as expected.
+
+RUN: lld-link -out:test.dll -machine:arm64ec arm64ec-func-sym.obj x86_64-func-sym.obj \
+RUN: codemap.obj loadconfig-arm64ec.obj -dll -noentry
+
+RUN: llvm-readobj --coff-load-config test.dll | FileCheck -check-prefix=CODEMAP %s
+CODEMAP: CodeMap [
+CODEMAP-NEXT: 0x1000 - 0x1008 ARM64EC
+CODEMAP-NEXT: 0x2000 - 0x2006 X64
+CODEMAP-NEXT: 0x5000 - 0x5008 ARM64EC
+CODEMAP-NEXT: 0x6000 - 0x6006 X64
+CODEMAP-NEXT: ]
+
+RUN: llvm-objdump -d test.dll | FileCheck -check-prefix=DISASM %s
+DISASM: Disassembly of section .text:
+DISASM-EMPTY:
+DISASM-NEXT: 0000000180001000 <.text>:
+DISASM-NEXT: 180001000: 52800040 mov w0, #0x2
+DISASM-NEXT: 180001004: d65f03c0 ret
+DISASM-NEXT: ...
+DISASM-NEXT: 180002000: b8 03 00 00 00 movl $0x3, %eax
+DISASM-NEXT: 180002005: c3 retq
+DISASM-EMPTY:
+DISASM-NEXT: Disassembly of section test:
+DISASM-EMPTY:
+DISASM-NEXT: 0000000180005000 <test>:
+DISASM-NEXT: 180005000: 528000a0 mov w0, #0x5
+DISASM-NEXT: 180005004: d65f03c0 ret
+DISASM-NEXT: ...
+DISASM-NEXT: 180006000: b8 06 00 00 00 movl $0x6, %eax
+DISASM-NEXT: 180006005: c3 retq
+
+Order of arguments doesn't matter in this case, chunks are sorted by target type anyway.
+
+RUN: lld-link -out:test2.dll -machine:arm64ec x86_64-func-sym.obj arm64ec-func-sym.obj \
+RUN: codemap.obj loadconfig-arm64ec.obj -dll -noentry
+RUN: llvm-readobj --coff-load-config test2.dll | FileCheck -check-prefix=CODEMAP %s
+RUN: llvm-objdump -d test2.dll | FileCheck -check-prefix=DISASM %s
+
+RUN: lld-link -out:testx.dll -machine:arm64x arm64-func-sym.obj arm64ec-func-sym.obj \
+RUN: x86_64-func-sym.obj codemap2.obj loadconfig-arm64ec.obj -dll -noentry
+
+Do the same with ARM64X target.
+
+RUN: llvm-readobj --coff-load-config testx.dll | FileCheck -check-prefix=CODEMAPX %s
+CODEMAPX: CodeMap [
+CODEMAPX-NEXT: 0x1000 - 0x1008 ARM64
+CODEMAPX-NEXT: 0x2000 - 0x2008 ARM64EC
+CODEMAPX-NEXT: 0x3000 - 0x3006 X64
+CODEMAPX-NEXT: 0x6000 - 0x6008 ARM64EC
+CODEMAPX-NEXT: 0x7000 - 0x7006 X64
+CODEMAPX-NEXT: ]
+
+RUN: llvm-objdump -d testx.dll | FileCheck -check-prefix=DISASMX %s
+DISASMX: Disassembly of section .text:
+DISASMX-EMPTY:
+DISASMX-NEXT: 0000000180001000 <.text>:
+DISASMX-NEXT: 180001000: 528000e0 mov w0, #0x7
+DISASMX-NEXT: 180001004: d65f03c0 ret
+DISASMX-NEXT: ...
+DISASMX-NEXT: 180002000: 52800040 mov w0, #0x2
+DISASMX-NEXT: 180002004: d65f03c0 ret
+DISASMX-NEXT: ...
+DISASMX-NEXT: 180003000: b8 03 00 00 00 movl $0x3, %eax
+DISASMX-NEXT: 180003005: c3 retq
+DISASMX-EMPTY:
+DISASMX-NEXT: Disassembly of section test:
+DISASMX-EMPTY:
+DISASMX-NEXT: 0000000180006000 <test>:
+DISASMX-NEXT: 180006000: 528000a0 mov w0, #0x5
+DISASMX-NEXT: 180006004: d65f03c0 ret
+DISASMX-NEXT: ...
+DISASMX-NEXT: 180007000: b8 06 00 00 00 movl $0x6, %eax
+DISASMX-NEXT: 180007005: c3 retq
+
+Test merged sections.
+
+RUN: lld-link -out:testm.dll -machine:arm64ec arm64ec-func-sym.obj x86_64-func-sym.obj \
+RUN: codemap3.obj loadconfig-arm64ec.obj -dll -noentry -merge:test=.text
+
+RUN: llvm-readobj --coff-load-config testm.dll | FileCheck -check-prefix=CODEMAPM %s
+CODEMAPM: CodeMap [
+CODEMAPM-NEXT: 0x1000 - 0x1010 ARM64EC
+CODEMAPM-NEXT: 0x2000 - 0x200E X64
+CODEMAPM-NEXT: ]
+
+RUN: llvm-objdump -d testm.dll | FileCheck -check-prefix=DISASMM %s
+DISASMM: Disassembly of section .text:
+DISASMM-EMPTY:
+DISASMM-NEXT: 0000000180001000 <.text>:
+DISASMM-NEXT: 180001000: 52800040 mov w0, #0x2
+DISASMM-NEXT: 180001004: d65f03c0 ret
+DISASMM-NEXT: 180001008: 528000a0 mov w0, #0x5
+DISASMM-NEXT: 18000100c: d65f03c0 ret
+DISASMM-NEXT: ...
+DISASMM-NEXT: 180002000: b8 03 00 00 00 movl $0x3, %eax
+DISASMM-NEXT: 180002005: c3 retq
+DISASMM-NEXT: 180002006: 00 00 addb %al, (%rax)
+DISASMM-NEXT: 180002008: b8 06 00 00 00 movl $0x6, %eax
+DISASMM-NEXT: 18000200d: c3 retq
+
+#--- arm64-func-sym.s
+ .text
+ .globl arm64_func_sym
+ .p2align 2, 0x0
+arm64_func_sym:
+ mov w0, #7
+ ret
+
+#--- arm64ec-func-sym.s
+ .text
+ .globl arm64ec_func_sym
+ .p2align 2, 0x0
+arm64ec_func_sym:
+ mov w0, #2
+ ret
+
+ .section test, "xr"
+ .globl arm64ec_func_sym2
+ .p2align 2, 0x0
+arm64ec_func_sym2:
+ mov w0, #5
+ ret
+
+#--- x86_64-func-sym.s
+ .text
+ .globl x86_64_func_sym
+ .p2align 2, 0x0
+x86_64_func_sym:
+ movl $3, %eax
+ retq
+
+ .section test, "xr"
+ .globl x86_64_func_sym2
+ .p2align 2, 0x0
+x86_64_func_sym2:
+ movl $6, %eax
+ retq
+
+#--- codemap.s
+ .section .rdata,"dr"
+ .globl code_map
+code_map:
+ .rva arm64ec_func_sym + 1
+ .word 8
+ .rva x86_64_func_sym + 2
+ ...
[truncated]
|
f2ee4f4
to
529e9f9
Compare
lld/COFF/Writer.cpp
Outdated
if (machine != IMAGE_FILE_MACHINE_UNKNOWN) { | ||
// We need additional alignment when crossing EC range baudaries. | ||
if (prevECMachine != IMAGE_FILE_MACHINE_UNKNOWN && | ||
machine != prevECMachine) | ||
virtualSize = alignTo(virtualSize, 4096); | ||
prevECMachine = machine; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that an early continue might simplify this:
if (machine != IMAGE_FILE_MACHINE_UNKNOWN) { | |
// We need additional alignment when crossing EC range baudaries. | |
if (prevECMachine != IMAGE_FILE_MACHINE_UNKNOWN && | |
machine != prevECMachine) | |
virtualSize = alignTo(virtualSize, 4096); | |
prevECMachine = machine; | |
} | |
if (prevECMachine == machine || machine == IMAGE_FILE_MACHINE_UNKNOWN) | |
continue; | |
// We need additional alignment when crossing EC range boundaries. | |
virtualSize = alignTo(virtualSize, 4096); | |
prevECMachine = machine; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't use continue here because we still need to run the rest of the loop. I looked as simplifying IMAGE_FILE_MACHINE_UNKNOWN
handling, which inspired me to do more MSVC testing and finding a potentially interesting behavior. It seems to preserve the fact that a chunk is a data chunk even when it's merged to a code section. I need to think more about it and run more tests, we may want to do something similar.
529e9f9
to
6314e78
Compare
I ended up implementing preserving data section information across merges and took that into account when sorting chunks in #70722. Those changes allowed simplifying this MR. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, this looks straightforward.
Boundaries between code chunks of different architecture are always aligned. 0x1000 seems to be a constant, this does not seem to be affected by any command line alignment argument.
6314e78
to
8ab6da1
Compare
This is a refreshed version of https://reviews.llvm.org/D157142 and depends on #69099.
Boundaries between code chunks of different architecture are always aligned. 0x1000 seems to be a constant, this does not seem to be affected by any command line alignment argument.
More info about my findings is here: https://wiki.winehq.org/ARM64ECToolchain#Code_layout This is compatible with MSVC and I suspect that it is motivated by emulator needing to watch for x86_64 code writes to invalidate JIT code.
cc @bylaws