forked from edef1c/libfringe
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathx86_64.rs
414 lines (387 loc) · 17.6 KB
/
x86_64.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
// This file is part of libfringe, a low-level green threading library.
// Copyright (c) edef <[email protected]>,
// whitequark <[email protected]>
// Amanieu d'Antras <[email protected]>
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
// To understand the code in this file, keep in mind these two facts:
// * x86_64 SysV C ABI has a "red zone": 128 bytes under the top of the stack
// that is defined to be unmolested by signal handlers, interrupts, etc.
// Leaf functions can use the red zone without adjusting rsp or rbp.
// * x86_64 SysV C ABI requires the stack to be aligned at function entry,
// so that (%rsp+8) is a multiple of 16. Aligned operands are a requirement
// of SIMD instructions, and making this the responsibility of the caller
// avoids having to maintain a frame pointer, which is necessary when
// a function has to realign the stack from an unknown state.
// * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi
// to pass a value while swapping context; this is an arbitrary choice
// (we clobber all registers and could use any of them) but this allows us
// to reuse the swap function to perform the initial call. We do the same
// thing with %rsi to pass the stack pointer to the new context.
//
// To understand the DWARF CFI code in this file, keep in mind these facts:
// * CFI is "call frame information"; a set of instructions to a debugger or
// an unwinder that allow it to simulate returning from functions. This implies
// restoring every register to its pre-call state, as well as the stack pointer.
// * CFA is "call frame address"; the value of stack pointer right before the call
// instruction in the caller. Everything strictly below CFA (and inclusive until
// the next CFA) is the call frame of the callee. This implies that the return
// address is the part of callee's call frame.
// * Logically, DWARF CFI is a table where rows are instruction pointer values and
// columns describe where registers are spilled (mostly using expressions that
// compute a memory location as CFA+n). A .cfi_offset pseudoinstruction changes
// the state of a column for all IP numerically larger than the one it's placed
// after. A .cfi_def_* pseudoinstruction changes the CFA value similarly.
// * Simulating return is as easy as restoring register values from the CFI table
// and then setting stack pointer to CFA.
//
// A high-level overview of the function of the trampolines when unwinding is:
// * The 2nd init trampoline puts a controlled value (written in swap to `new_cfa`)
// into %rbp. This is then used as the CFA for the 1st trampoline.
// * This controlled value points to the bottom of the stack of the parent context,
// which holds the saved %rbp and return address from the call to swap().
// * The 1st init trampoline tells the unwinder to restore %rbp and its return
// address from the stack frame at %rbp (in the parent stack), thus continuing
// unwinding at the swap call site instead of falling off the end of context stack.
use crate::{arch::StackPointer, unwind};
use core::ptr::NonNull;
pub const STACK_ALIGNMENT: usize = 16;
pub unsafe fn init(
stack_base: *mut u8,
f: unsafe extern "C" fn(usize, StackPointer),
) -> StackPointer {
#[cfg(not(target_vendor = "apple"))]
#[allow(named_asm_labels)]
#[naked]
unsafe extern "C" fn trampoline_1() {
asm!(
// gdb has a hardcoded check that rejects backtraces where frame addresses
// do not monotonically decrease. It is turned off if the function is called
// "__morestack" and that is hardcoded. So, to make gdb backtraces match
// the actual unwinder behavior, we call ourselves "__morestack" and mark
// the symbol as local; it shouldn't interfere with anything.
"__morestack:",
".local __morestack",
// Set up the first part of our DWARF CFI linking stacks together. When
// we reach this function from unwinding, %rbp will be pointing at the bottom
// of the parent linked stack. This link is set each time swap() is called.
// When unwinding the frame corresponding to this function, a DWARF unwinder
// will use %rbp+16 as the next call frame address, restore return address
// from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
// of `swap_trampoline` does.
".cfi_def_cfa rbp, 16",
".cfi_offset rbp, -16",
// This nop is here so that the initial swap doesn't return to the start
// of the trampoline, which confuses the unwinder since it will look for
// frame information in the previous symbol rather than this one. It is
// never actually executed.
"nop",
// Stack unwinding in some versions of libunwind doesn't seem to like
// 1-byte symbols, so we add a second nop here. This instruction isn't
// executed either, it is only here to pad the symbol size.
"nop",
".Lend:",
".size __morestack, .Lend-__morestack",
options(noreturn)
);
}
#[cfg(target_vendor = "apple")]
#[naked]
unsafe extern "C" fn trampoline_1() {
asm!(
// Identical to the above, except avoids .local/.size that aren't available on Mach-O.
"__morestack:",
".private_extern __morestack",
".cfi_def_cfa rbp, 16",
".cfi_offset rbp, -16",
"nop",
"nop",
options(noreturn),
)
}
#[naked]
unsafe extern "C" fn trampoline_2() {
asm!(
// Set up the second part of our DWARF CFI.
// When unwinding the frame corresponding to this function, a DWARF unwinder
// will restore %rbp (and thus CFA of the first trampoline) from the stack slot.
// This stack slot is updated every time swap() is called to point to the bottom
// of the stack of the context switch just switched from.
".cfi_def_cfa rbp, 16",
".cfi_offset rbp, -16",
// This nop is here so that the return address of the swap trampoline
// doesn't point to the start of the symbol. This confuses gdb's backtraces,
// causing them to think the parent function is trampoline_1 instead of
// trampoline_2.
"nop",
// Call unwind_wrapper with the provided function and the stack base address.
"lea rdx, [rsp + 32]",
"mov rcx, [rsp + 16]",
"call {0}",
// Restore the stack pointer of the parent context. No CFI adjustments
// are needed since we have the same stack frame as trampoline_1.
"mov rsp, [rsp]",
// Restore frame pointer of the parent context.
"pop rbp",
".cfi_adjust_cfa_offset -8",
".cfi_restore rbp",
// If the returned value is nonzero, trigger an unwind in the parent
// context with the given exception object.
"mov rdi, rax",
"test rax, rax",
"jnz {1}",
// Clear the stack pointer. We can't call into this context any more once
// the function has returned.
"xor rsi, rsi",
// Return into the parent context. Use `pop` and `jmp` instead of a `ret`
// to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
"pop rax",
".cfi_adjust_cfa_offset -8",
".cfi_register rip, rax",
"jmp rax",
sym unwind::unwind_wrapper,
sym unwind::start_unwind,
options(noreturn),
);
}
// We set up the stack in a somewhat special way so that to the unwinder it
// looks like trampoline_1 has called trampoline_2, which has in turn called
// swap::trampoline.
//
// There are 2 call frames in this setup, each containing the return address
// followed by the %rbp value for that frame. This setup supports unwinding
// using DWARF CFI as well as the frame pointer-based unwinding used by tools
// such as perf or dtrace.
let mut sp = StackPointer::new(stack_base);
sp.push(0 as usize); // Padding to ensure the stack is properly aligned
sp.push(f as usize); // Function that trampoline_2 should call
// Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
// each time a context switch is performed.
sp.push(trampoline_1 as usize + 2); // Return after the 2 nops
sp.push(0xdeaddeaddead0cfa); // CFA slot
// Call frame for swap::trampoline. We set up the %rbp value to point to the
// parent call frame.
let frame = sp.offset(0);
sp.push(trampoline_2 as usize + 1); // Entry point, skip initial nop
sp.push(frame as usize); // Pointer to parent call frame
sp
}
#[inline(always)]
pub unsafe fn swap_link(
arg: usize,
new_sp: StackPointer,
new_stack_base: *mut u8,
) -> (usize, Option<StackPointer>) {
let mut ret: usize;
let mut ret_sp: *mut usize;
asm!(
// FIXME(cynecx): figure out correct cfi directives.
// Save `rbx` because we can't use `rbx` in a clobber because it's reserved by llvm.
// "push rbx",
// ".cfi_adjust_cfa_offset 8",
// ".cfi_rel_offset rbx, 0",
// Push the return address
"lea rax, [rip + 0f]",
"push rax",
// Save frame pointer explicitly; the unwinder uses it to find CFA of
// the caller, and so it has to have the correct value immediately after
// the call instruction that invoked the trampoline.
"push rbp",
// Link the call stacks together by writing the current stack bottom
// address to the CFA slot in the new stack.
"mov [rcx - 32], rsp",
// Pass the stack pointer of the old context to the new one.
"mov rsi, rsp",
// Load stack pointer of the new context.
"mov rsp, rdx",
// Restore frame pointer of the new context.
"pop rbp",
// Return into the new context. Use `pop` and `jmp` instead of a `ret`
// to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
"pop rax",
"jmp rax",
// Reentry
"0:",
// FIXME(cynecx): figure out correct cfi directives.
// Restore `rbx` which we've saved before because we can't use it as a clobber.
// "pop rbx",
// ".cfi_adjust_cfa_offset -8",
// ".cfi_restore rbx",
// Outputs
lateout("rdi") ret,
lateout("rsi") ret_sp,
// Inputs
in("rdi") arg,
in("rdx") new_sp.offset(0),
in("rcx") new_stack_base,
// Clobbers
out("rax") _, out("rbx") _, lateout("rcx") _, lateout("rdx") _,
out("r8") _, out("r9") _, out("r10") _, out("r11") _,
out("r12") _, out("r13") _, out("r14") _, out("r15") _,
out("mm0") _, out("mm1") _, out("mm2") _, out("mm3") _,
out("mm4") _, out("mm5") _, out("mm6") _, out("mm7") _,
out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
out("xmm16") _, out("xmm17") _, out("xmm18") _, out("xmm19") _,
out("xmm20") _, out("xmm21") _, out("xmm22") _, out("xmm23") _,
out("xmm24") _, out("xmm25") _, out("xmm26") _, out("xmm27") _,
out("xmm28") _, out("xmm29") _, out("xmm30") _, out("xmm31") _,
/* Options:
rustc emits the following clobbers,
- by *not* specifying `options(preserves_flags)`:
(x86) ~{dirflag},~{flags},~{fpsr}
(ARM/AArch64) ~{cc}
- by *not* specifying `options(nomem)`:
~{memory}
- by *not* specifying `nostack`:
alignstack
*/
options(unwind)
);
(ret, NonNull::new(ret_sp).map(StackPointer::from))
}
#[inline(always)]
pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) {
// This is identical to swap_link, but without the write to the CFA slot.
let mut ret: usize;
let mut ret_sp: *mut usize;
asm!(
// FIXME(cynecx): figure out correct cfi directives.
// Save `rbx` because we can't use `rbx` in a clobber because it's reserved by llvm.
// "push rbx",
// ".cfi_adjust_cfa_offset 8",
// ".cfi_rel_offset rbx, 0",
// Push the return address
"lea rax, [rip + 0f]",
"push rax",
// Save frame pointer explicitly; the unwinder uses it to find CFA of
// the caller, and so it has to have the correct value immediately after
// the call instruction that invoked the trampoline.
"push rbp",
// Pass the stack pointer of the old context to the new one.
"mov rsi, rsp",
// Load stack pointer of the new context.
"mov rsp, rdx",
// Restore frame pointer of the new context.
"pop rbp",
// Return into the new context. Use `pop` and `jmp` instead of a `ret`
// to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
"pop rax",
"jmp rax",
// Reentry
"0:",
// FIXME(cynecx): figure out correct cfi directives.
// Restore `rbx` which we've saved before because we can't use it as a clobber.
// "pop rbx",
// ".cfi_adjust_cfa_offset -8",
// ".cfi_restore rbx",
//
// Outputs
lateout("rdi") ret,
lateout("rsi") ret_sp,
// Inputs
in("rdi") arg,
in("rdx") new_sp.offset(0),
// Clobbers
out("rax") _, out("rbx") _, out("rcx") _, lateout("rdx") _,
out("r8") _, out("r9") _, out("r10") _, out("r11") _,
out("r12") _, out("r13") _, out("r14") _, out("r15") _,
out("mm0") _, out("mm1") _, out("mm2") _, out("mm3") _,
out("mm4") _, out("mm5") _, out("mm6") _, out("mm7") _,
out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
out("xmm16") _, out("xmm17") _, out("xmm18") _, out("xmm19") _,
out("xmm20") _, out("xmm21") _, out("xmm22") _, out("xmm23") _,
out("xmm24") _, out("xmm25") _, out("xmm26") _, out("xmm27") _,
out("xmm28") _, out("xmm29") _, out("xmm30") _, out("xmm31") _,
/* Options:
rustc emits the following clobbers,
- by *not* specifying `options(preserves_flags)`:
(x86) ~{dirflag},~{flags},~{fpsr}
(ARM/AArch64) ~{cc}
- by *not* specifying `options(nomem)`:
~{memory}
- by *not* specifying `nostack`:
alignstack
*/
options(unwind)
);
(ret, StackPointer::new(ret_sp))
}
#[inline(always)]
pub unsafe fn unwind(new_sp: StackPointer, new_stack_base: *mut u8) {
// Argument to pass to start_unwind, based on the stack base address.
let arg = unwind::unwind_arg(new_stack_base);
// This is identical to swap_link, except that it performs a tail call to
// start_unwind instead of returning into the target context.
asm!(
// FIXME(cynecx): figure out correct cfi directives.
// Save `rbx` because we can't use `rbx` in a clobber because it's reserved by llvm.
// "push rbx",
// ".cfi_adjust_cfa_offset 8",
// ".cfi_rel_offset rbx, 0",
// Push the return address
"lea rax, [rip + 0f]",
"push rax",
// Save frame pointer explicitly; the unwinder uses it to find CFA of
// the caller, and so it has to have the correct value immediately after
// the call instruction that invoked the trampoline.
"push rbp",
// Link the call stacks together by writing the current stack bottom
// address to the CFA slot in the new stack.
"mov [rcx - 32], rsp",
// Load stack pointer of the new context.
"mov rsp, rdx",
// Restore frame pointer of the new context.
"pop rbp",
// Jump to the start_unwind function, which will force a stack unwind in
// the target context. This will eventually return to us through the
// stack link.
"jmp {0}",
// Reentry
"0:",
// FIXME(cynecx): figure out correct cfi directives.
// Restore `rbx` which we've saved before because we can't use it as a clobber.
// "pop rbx",
// ".cfi_adjust_cfa_offset -8",
// ".cfi_restore rbx",
// Symbols
sym unwind::start_unwind,
// Inputs
in("rdi") arg,
in("rdx") new_sp.offset(0),
in("rcx") new_stack_base,
// Clobbers
out("rax") _, out("rbx") _, lateout("rcx") _, lateout("rdx") _,
lateout("rdi") _, lateout("rsi") _,
out("r8") _, out("r9") _, out("r10") _, out("r11") _,
out("r12") _, out("r13") _, out("r14") _, out("r15") _,
out("mm0") _, out("mm1") _, out("mm2") _, out("mm3") _,
out("mm4") _, out("mm5") _, out("mm6") _, out("mm7") _,
out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
out("xmm16") _, out("xmm17") _, out("xmm18") _, out("xmm19") _,
out("xmm20") _, out("xmm21") _, out("xmm22") _, out("xmm23") _,
out("xmm24") _, out("xmm25") _, out("xmm26") _, out("xmm27") _,
out("xmm28") _, out("xmm29") _, out("xmm30") _, out("xmm31") _,
/* Options:
rustc emits the following clobbers,
- by *not* specifying `options(preserves_flags)`:
(x86) ~{dirflag},~{flags},~{fpsr}
(ARM/AArch64) ~{cc}
- by *not* specifying `options(nomem)`:
~{memory}
- by *not* specifying `nostack`:
alignstack
*/
options(unwind)
);
}