Skip to content

Commit

Permalink
Revert "x86/retpoline: Simplify vmexit_fill_RSB()"
Browse files Browse the repository at this point in the history
This reverts commit 1dde741. By putting
the RSB filling out of line and calling it, we waste one RSB slot for
returning from the function itself, which means one fewer actual function
call we can make if we're doing the Skylake abomination of call-depth
counting.

It also changed the number of RSB stuffings we do on vmexit from 32,
which was correct, to 16. Let's just stop with the bikeshedding; it
didn't actually *fix* anything anyway.

Signed-off-by: David Woodhouse <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
dwmw2 authored and Ingo Molnar committed Feb 20, 2018
1 parent 8554004 commit d1c9910
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 71 deletions.
3 changes: 1 addition & 2 deletions arch/x86/entry/entry_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
/* Clobbers %ebx */
FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif

/* restore callee-saved registers */
Expand Down
3 changes: 1 addition & 2 deletions arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
/* Clobbers %rbx */
FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif

/* restore callee-saved registers */
Expand Down
3 changes: 0 additions & 3 deletions arch/x86/include/asm/asm-prototypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
asmlinkage void __fill_rsb(void);
asmlinkage void __clear_rsb(void);

#endif /* CONFIG_RETPOLINE */
70 changes: 63 additions & 7 deletions arch/x86/include/asm/nospec-branch.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,50 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>

/*
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
*
* This is required in various cases for retpoline and IBRS-based
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
* eliminate potentially bogus entries from the RSB, and sometimes
* purely to ensure that it doesn't get empty, which on some CPUs would
* allow predictions from other (unwanted!) sources to be used.
*
* We define a CPP macro such that it can be used from both .S files and
* inline assembly. It's possible to do a .macro and then include that
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
*/

#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
#define RSB_FILL_LOOPS 16 /* To avoid underflow */

/*
* Google experimented with loop-unrolling and this turned out to be
* the optimal version — two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
mov $(nr/2), reg; \
771: \
call 772f; \
773: /* speculation trap */ \
pause; \
lfence; \
jmp 773b; \
772: \
call 774f; \
775: /* speculation trap */ \
pause; \
lfence; \
jmp 775b; \
774: \
dec reg; \
jnz 771b; \
add $(BITS_PER_LONG/8) * nr, sp;

#ifdef __ASSEMBLY__

/*
Expand Down Expand Up @@ -78,10 +122,17 @@
#endif
.endm

/* This clobbers the BX register */
.macro FILL_RETURN_BUFFER nr:req ftr:req
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
ALTERNATIVE "", "call __clear_rsb", \ftr
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE "jmp .Lskip_rsb_\@", \
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
\ftr
.Lskip_rsb_\@:
#endif
.endm

Expand Down Expand Up @@ -156,10 +207,15 @@ extern char __indirect_thunk_end[];
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
alternative_input("",
"call __fill_rsb",
X86_FEATURE_RETPOLINE,
ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
unsigned long loops;

asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE("jmp 910f",
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
X86_FEATURE_RETPOLINE)
"910:"
: "=r" (loops), ASM_CALL_CONSTRAINT
: : "memory" );
#endif
}

Expand Down
1 change: 0 additions & 1 deletion arch/x86/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RETPOLINE) += retpoline.o
OBJECT_FILES_NON_STANDARD_retpoline.o :=y

obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o

Expand Down
56 changes: 0 additions & 56 deletions arch/x86/lib/retpoline.S
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/bitsperlong.h>

.macro THUNK reg
.section .text.__x86.indirect_thunk
Expand Down Expand Up @@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
GENERATE_THUNK(r14)
GENERATE_THUNK(r15)
#endif

/*
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
*
* This is required in various cases for retpoline and IBRS-based
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
* eliminate potentially bogus entries from the RSB, and sometimes
* purely to ensure that it doesn't get empty, which on some CPUs would
* allow predictions from other (unwanted!) sources to be used.
*
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
.macro STUFF_RSB nr:req sp:req
mov $(\nr / 2), %_ASM_BX
.align 16
771:
call 772f
773: /* speculation trap */
pause
lfence
jmp 773b
.align 16
772:
call 774f
775: /* speculation trap */
pause
lfence
jmp 775b
.align 16
774:
dec %_ASM_BX
jnz 771b
add $((BITS_PER_LONG/8) * \nr), \sp
.endm

#define RSB_FILL_LOOPS 16 /* To avoid underflow */

ENTRY(__fill_rsb)
STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
ret
END(__fill_rsb)
EXPORT_SYMBOL_GPL(__fill_rsb)

#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */

ENTRY(__clear_rsb)
STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
ret
END(__clear_rsb)
EXPORT_SYMBOL_GPL(__clear_rsb)

0 comments on commit d1c9910

Please sign in to comment.