-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: Clone all local addresses when importing dup
#72714
Conversation
Roslyn emits `dup` for the field address when compound assignment operators are used on struct fields. We would previously spill this address leading us to mark such structs as address exposed and disabling promotion. Also allow removing unnecessary casts in cases like ``` ASG LCL_FLD ubyte V00 CAST int <- ubyte <- int ... ``` we only allowed this cast removal for LCL_VAR and IND before, which led to unnecessary new casts in some cases with this change.
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue DetailsRoslyn emits Also allow removing unnecessary casts in cases like
we only allowed this cast removal for LCL_VAR and IND before, which led
|
Example codegen diff for [MethodImpl(MethodImplOptions.NoInlining)]
static int Test(int its)
{
Foo f = new Foo();
for (int i = 0; i < its; i++)
{
f.A += f.B * i;
f.B = i;
}
return f.A + f.B;
}
struct Foo
{
public int A, B;
} is ; No PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T02] ( 4, 7 ) int -> rcx single-def
-; V01 loc0 [V01 ] ( 5, 14 ) struct ( 8) [rsp+00H] do-not-enreg[XS] must-init addr-exposed ld-addr-op
-; V02 loc1 [V02,T01] ( 6, 21 ) int -> rax
+; V00 arg0 [V00,T03] ( 4, 7 ) int -> rcx single-def
+;* V01 loc0 [V01 ] ( 0, 0 ) struct ( 8) zero-ref ld-addr-op
+; V02 loc1 [V02,T00] ( 5, 17 ) int -> r8
;# V03 OutArgs [V03 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
-; V04 tmp1 [V04,T00] ( 3, 24 ) byref -> rdx "dup spill"
-; V05 tmp2 [V05 ] ( 2, 5 ) int -> [rsp+00H] do-not-enreg[X] addr-exposed V01.A(offs=0x00) P-DEP "field V01.A (fldOffset=0x0)"
-; V06 tmp3 [V06 ] ( 3, 9 ) int -> [rsp+04H] do-not-enreg[X] addr-exposed V01.B(offs=0x04) P-DEP "field V01.B (fldOffset=0x4)"
+; V04 tmp1 [V04,T02] ( 4, 10 ) int -> rax V01.A(offs=0x00) P-INDEP "field V01.A (fldOffset=0x0)"
+; V05 tmp2 [V05,T01] ( 5, 14 ) int -> rdx V01.B(offs=0x04) P-INDEP "field V01.B (fldOffset=0x4)"
;
-; Lcl frame size = 8
+; Lcl frame size = 0
G_M46009_IG01:
- push rax
- xor eax, eax
- mov qword ptr [rsp], rax
- ;; size=7 bbWeight=1 PerfScore 2.25
+ ;; size=0 bbWeight=1 PerfScore 0.00
G_M46009_IG02:
xor eax, eax
+ xor edx, edx
+ xor r8d, r8d
test ecx, ecx
jle SHORT G_M46009_IG04
- align [3 bytes for IG03]
- ;; size=9 bbWeight=1 PerfScore 1.75
+ align [0 bytes for IG03]
+ ;; size=11 bbWeight=1 PerfScore 2.00
G_M46009_IG03:
- lea rdx, bword ptr [rsp]
- mov r8d, eax
- imul r8d, dword ptr [rsp+04H]
- add dword ptr [rdx], r8d
- mov dword ptr [rsp+04H], eax
- inc eax
- cmp eax, ecx
+ imul edx, r8d
+ add eax, edx
+ mov edx, r8d
+ lea r8d, [rdx+1]
+ cmp r8d, ecx
jl SHORT G_M46009_IG03
- ;; size=26 bbWeight=4 PerfScore 41.00
+ ;; size=18 bbWeight=4 PerfScore 17.00
G_M46009_IG04:
- mov eax, dword ptr [rsp]
- add eax, dword ptr [rsp+04H]
- ;; size=7 bbWeight=1 PerfScore 3.00
+ add eax, edx
+ ;; size=2 bbWeight=1 PerfScore 0.25
G_M46009_IG05:
- add rsp, 8
ret
- ;; size=5 bbWeight=1 PerfScore 1.25
+ ;; size=1 bbWeight=1 PerfScore 1.00 |
cc @dotnet/jit-contrib PTAL @EgorBo Spot checking the regressions it seems to be mostly because we do not support RMW ops for G_M33991_IG05: ; gcrefRegs=00000000 {}, byrefRegs=00000001 {rax}, byref
mov byte ptr [rax], dl
- lea rax, bword ptr [rsp]
- or byte ptr [rax], 64
mov eax, dword ptr [rsp]
; byrRegs -[rax]
- ;; size=12 bbWeight=1 PerfScore 5.50
+ movzx rax, al
+ or eax, 64
+ mov byte ptr [rsp], al
+ mov eax, dword ptr [rsp]
+ ;; size=17 bbWeight=1 PerfScore 4.50
G_M33991_IG06: ; , epilog, nogc, extend
add rsp, 8
ret This seems to happen rarely so I think we can wait and see if there are any significant regressions in the micro benchmarks. There are a lot of asp.net improvements due to changing the |
Improvements on |
Possible regressions: Possible improvements: |
Roslyn emits
dup
for the field address when compound assignmentoperators are used on struct fields. We would previously spill this
address leading us to mark such structs as address exposed and disabling
promotion.
Also allow removing unnecessary casts in cases like
we only allowed this cast removal for LCL_VAR and IND before, which led
to unnecessary new casts in some cases with this change.