-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify and optimize Math(F).Round #98186
Merged
Merged
+78
−132
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ghost
added
the
community-contribution
Indicates that the PR has been added by a community member
label
Feb 8, 2024
Tagging subscribers to this area: @dotnet/area-system-numerics Issue DetailsSimplifies and optimizes parameterized Round overloads. Code: internal static class RoundDisasm
{
public static double RoundAway(double d) => Math.Round(d, MidpointRounding.AwayFromZero);
public static double RoundAway(float f) => MathF.Round(f, MidpointRounding.AwayFromZero);
public static double RoundTruncate(double d) => Math.Round(d, MidpointRounding.ToZero);
public static double RoundTruncate(float f) => MathF.Round(f, MidpointRounding.ToZero);
public static double Round(double d, MidpointRounding m) => Math.Round(d, m);
public static double Round(float f, MidpointRounding m) => MathF.Round(f, m);
public static double Round2(double d) => Math.Round(d, 2);
public static double Round2(float f) => MathF.Round(f, 2);
public static double Round4(double d) => Math.Round(d, 4, MidpointRounding.ToZero);
public static double Round4(float f) => MathF.Round(f, 4, MidpointRounding.ToZero);
} .NET 8 ; Assembly listing for method RoundDisasm:RoundAway(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0003
C5F828C8 vmovaps xmm1, xmm0
C5F8101521000000 vmovups xmm2, xmmword ptr [reloc @RWD00]
C5F154CA vandpd xmm1, xmm1, xmm2
62F1F518560D23000000 vorpd xmm1, xmm1, qword ptr [reloc @RWD16] {1to2}
C5F358C0 vaddsd xmm0, xmm1, xmm0
C4E3790BC00B vroundsd xmm0, xmm0, xmm0, 11
G_M000_IG03: ;; offset=0x0027
C3 ret
RWD00 dq 8000000000000000h, 8000000000000000h
RWD16 dq 3FDFFFFFFFFFFFFFh ; 0.5
; Total bytes of code 40
; Assembly listing for method RoundDisasm:RoundAway(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0003
C5F828C8 vmovaps xmm1, xmm0
C5F8101521000000 vmovups xmm2, xmmword ptr [reloc @RWD00]
C5F054CA vandps xmm1, xmm1, xmm2
62F17418560D23000000 vorps xmm1, xmm1, dword ptr [reloc @RWD16] {1to4}
C5F258C0 vaddss xmm0, xmm1, xmm0
C4E3790AC00B vroundss xmm0, xmm0, xmm0, 11
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
G_M000_IG03: ;; offset=0x002B
C3 ret
RWD00 dq 8000000080000000h, 8000000080000000h
RWD16 dd 3EFFFFFFh ; 0.5
; Total bytes of code 44
; Assembly listing for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0003
33D2 xor edx, edx
41B802000000 mov r8d, 2
G_M000_IG03: ;; offset=0x000B
FF259F041E00 tail.jmp [System.Math:Round(double,int,int):double]
; Total bytes of code 17
; Assembly listing for method RoundDisasm:RoundTruncate(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
4883EC28 sub rsp, 40
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0007
33D2 xor edx, edx
41B802000000 mov r8d, 2
FF15F38F1E00 call [System.MathF:Round(float,int,int):float]
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
G_M000_IG03: ;; offset=0x0019
4883C428 add rsp, 40
C3 ret
; Total bytes of code 30
; Assembly listing for method RoundDisasm:Round(double,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0003
448BC2 mov r8d, edx
33D2 xor edx, edx
G_M000_IG03: ;; offset=0x0008
FF2532041E00 tail.jmp [System.Math:Round(double,int,int):double]
; Total bytes of code 14
; Assembly listing for method RoundDisasm:Round(float,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
4883EC28 sub rsp, 40
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0007
448BC2 mov r8d, edx
33D2 xor edx, edx
FF15968F1E00 call [System.MathF:Round(float,int,int):float]
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
G_M000_IG03: ;; offset=0x0016
4883C428 add rsp, 40
C3 ret
; Total bytes of code 27
; Assembly listing for method RoundDisasm:Round2(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0003
BA02000000 mov edx, 2
4533C0 xor r8d, r8d
G_M000_IG03: ;; offset=0x000B
FF25DF031E00 tail.jmp [System.Math:Round(double,int,int):double]
; Total bytes of code 17
; Assembly listing for method RoundDisasm:Round2(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
G_M000_IG01: ;; offset=0x0000
4883EC28 sub rsp, 40
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0007
BA02000000 mov edx, 2
4533C0 xor r8d, r8d
FF15338F1E00 call [System.MathF:Round(float,int,int):float]
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
G_M000_IG03: ;; offset=0x0019
4883C428 add rsp, 40
C3 ret
; Total bytes of code 30
; Assembly listing for method RoundDisasm:Round4(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
G_M000_IG01: ;; offset=0x0000
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0003
BA04000000 mov edx, 4
41B802000000 mov r8d, 2
G_M000_IG03: ;; offset=0x000E
FF256C031E00 tail.jmp [System.Math:Round(double,int,int):double]
; Total bytes of code 20
; Assembly listing for method RoundDisasm:Round4(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
G_M000_IG01: ;; offset=0x0000
4883EC28 sub rsp, 40
C5F877 vzeroupper
G_M000_IG02: ;; offset=0x0007
BA04000000 mov edx, 4
41B802000000 mov r8d, 2
FF15C08E1E00 call [System.MathF:Round(float,int,int):float]
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
G_M000_IG03: ;; offset=0x001C
4883C428 add rsp, 40
C3 ret
; Total bytes of code 33 This branch ; Assembly listing for method RoundDisasm:RoundAway(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 4, 4 ) double -> mm0 single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 2 ) double -> mm0 "Inline return value spill temp"
;* V03 tmp2 [V03 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V04 tmp3 [V04 ] ( 0, 0 ) double -> zero-ref "Inline return value spill temp"
;* V05 tmp4 [V05 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 0
G_M25588_IG01: ;; offset=0x0000
C5F877 vzeroupper
;; size=3 bbWeight=1 PerfScore 1.00
G_M25588_IG02: ;; offset=0x0003
C5F8100D25000000 vmovups xmm1, xmmword ptr [reloc @RWD00]
C5F828D0 vmovaps xmm2, xmm0
62F3ED08250D26000000CA vpternlogq xmm1, xmm2, xmmword ptr [reloc @RWD16], -54
C5F358C0 vaddsd xmm0, xmm1, xmm0
C4E3790BC00B vroundsd xmm0, xmm0, xmm0, 11
;; size=33 bbWeight=1 PerfScore 15.25
G_M25588_IG03: ;; offset=0x0024
C3 ret
;; size=1 bbWeight=1 PerfScore 1.00
RWD00 dq 8000000000000000h, 8000000000000000h
RWD16 dq 3FDFFFFFFFFFFFFFh, 3FDFFFFFFFFFFFFFh
; Total bytes of code 37, prolog size 3, PerfScore 17.25, instruction count 7, allocated bytes for code 37 (MethodHash=318b9c0b) for method RoundDisasm:RoundAway(double):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:RoundAway(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 4, 4 ) float -> mm0 single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 2 ) float -> mm0 "Inline return value spill temp"
;* V03 tmp2 [V03 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V04 tmp3 [V04 ] ( 0, 0 ) float -> zero-ref "Inline return value spill temp"
;* V05 tmp4 [V05 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 0
G_M34641_IG01: ;; offset=0x0000
C5F877 vzeroupper
;; size=3 bbWeight=1 PerfScore 1.00
G_M34641_IG02: ;; offset=0x0003
C5F8100D25000000 vmovups xmm1, xmmword ptr [reloc @RWD00]
C5F828D0 vmovaps xmm2, xmm0
62F36D08250D26000000CA vpternlogd xmm1, xmm2, xmmword ptr [reloc @RWD16], -54
C5F258C0 vaddss xmm0, xmm1, xmm0
C4E3790AC00B vroundss xmm0, xmm0, xmm0, 11
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
;; size=37 bbWeight=1 PerfScore 19.25
G_M34641_IG03: ;; offset=0x0028
C3 ret
;; size=1 bbWeight=1 PerfScore 1.00
RWD00 dq 8000000080000000h, 8000000080000000h
RWD16 dq 3EFFFFFF3EFFFFFFh, 3EFFFFFF3EFFFFFFh
; Total bytes of code 41, prolog size 3, PerfScore 21.25, instruction count 8, allocated bytes for code 41 (MethodHash=f9e078ae) for method RoundDisasm:RoundAway(float):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 3, 3 ) double -> mm0 single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 2 ) double -> mm0 "Inline return value spill temp"
;* V03 tmp2 [V03 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V04 tmp3 [V04 ] ( 0, 0 ) double -> zero-ref "Inline return value spill temp"
;* V05 tmp4 [V05 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 0
G_M39156_IG01: ;; offset=0x0000
C5F877 vzeroupper
;; size=3 bbWeight=1 PerfScore 1.00
G_M39156_IG02: ;; offset=0x0003
C4E3790BC00B vroundsd xmm0, xmm0, xmm0, 11
;; size=6 bbWeight=1 PerfScore 7.00
G_M39156_IG03: ;; offset=0x0009
C3 ret
;; size=1 bbWeight=1 PerfScore 1.00
; Total bytes of code 10, prolog size 3, PerfScore 9.00, instruction count 3, allocated bytes for code 10 (MethodHash=6fe1670b) for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:RoundTruncate(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 3, 3 ) float -> mm0 single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 2 ) float -> mm0 "Inline return value spill temp"
;* V03 tmp2 [V03 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V04 tmp3 [V04 ] ( 0, 0 ) float -> zero-ref "Inline return value spill temp"
;* V05 tmp4 [V05 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 0
G_M7249_IG01: ;; offset=0x0000
C5F877 vzeroupper
;; size=3 bbWeight=1 PerfScore 1.00
G_M7249_IG02: ;; offset=0x0003
C4E3790AC00B vroundss xmm0, xmm0, xmm0, 11
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
;; size=10 bbWeight=1 PerfScore 11.00
G_M7249_IG03: ;; offset=0x000D
C3 ret
;; size=1 bbWeight=1 PerfScore 1.00
; Total bytes of code 14, prolog size 3, PerfScore 13.00, instruction count 4, allocated bytes for code 14 (MethodHash=8f28e3ae) for method RoundDisasm:RoundTruncate(float):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:Round(double,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 8, 5 ) double -> mm0 single-def
; V01 arg1 [V01,T00] ( 5, 4 ) int -> rbx single-def
; V02 OutArgs [V02 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V03 tmp1 [V03,T02] ( 6, 3.50) double -> mm0 "Inline return value spill temp"
;* V04 tmp2 [V04 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V05 tmp3 [V05 ] ( 0, 0 ) double -> zero-ref "Inline return value spill temp"
; V06 tmp4 [V06,T03] ( 2, 0 ) ref -> rdx single-def "argument with side effect"
;
; Lcl frame size = 32
G_M6149_IG01: ;; offset=0x0000
53 push rbx
4883EC20 sub rsp, 32
C5F877 vzeroupper
8BDA mov ebx, edx
;; size=10 bbWeight=1 PerfScore 2.50
G_M6149_IG02: ;; offset=0x000A
83FB04 cmp ebx, 4
775F ja SHORT G_M6149_IG09
8BCB mov ecx, ebx
488D1578000000 lea rdx, [reloc @RWD00]
8B148A mov edx, dword ptr [rdx+4*rcx]
488D05E8FFFFFF lea rax, G_M6149_IG02
4803D0 add rdx, rax
FFE2 jmp rdx
;; size=29 bbWeight=1 PerfScore 7.25
G_M6149_IG03: ;; offset=0x0027
C5F8100D81000000 vmovups xmm1, xmmword ptr [reloc @RWD32]
C5F828D0 vmovaps xmm2, xmm0
62F3ED08250D82000000CA vpternlogq xmm1, xmm2, xmmword ptr [reloc @RWD48], -54
C5F358C0 vaddsd xmm0, xmm1, xmm0
C4E3790BC00B vroundsd xmm0, xmm0, xmm0, 11
EB1E jmp SHORT G_M6149_IG08
;; size=35 bbWeight=0.50 PerfScore 8.62
G_M6149_IG04: ;; offset=0x004A
C4E3790BC004 vroundsd xmm0, xmm0, xmm0, 4
EB16 jmp SHORT G_M6149_IG08
;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG05: ;; offset=0x0052
C4E3790BC00B vroundsd xmm0, xmm0, xmm0, 11
EB0E jmp SHORT G_M6149_IG08
;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG06: ;; offset=0x005A
C4E3790BC009 vroundsd xmm0, xmm0, xmm0, 9
EB06 jmp SHORT G_M6149_IG08
;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG07: ;; offset=0x0062
C4E3790BC00A vroundsd xmm0, xmm0, xmm0, 10
;; size=6 bbWeight=0.50 PerfScore 3.50
G_M6149_IG08: ;; offset=0x0068
4883C420 add rsp, 32
5B pop rbx
C3 ret
;; size=6 bbWeight=1 PerfScore 1.75
G_M6149_IG09: ;; offset=0x006E
B9840B0000 mov ecx, 0xB84
48BA004022BEFA7F0000 mov rdx, 0x7FFABE224000
E88EE44E5F call CORINFO_HELP_STRCNS
488BD0 mov rdx, rax
8BCB mov ecx, ebx
FF1533947C00 call [System.ThrowHelper:ThrowArgumentException_InvalidEnumValue[int](int,System.String)]
CC int3
;; size=32 bbWeight=0 PerfScore 0.00
RWD00 dd 00000040h ; case G_M6149_IG04
dd 0000001Dh ; case G_M6149_IG03
dd 00000048h ; case G_M6149_IG05
dd 00000050h ; case G_M6149_IG06
dd 00000058h ; case G_M6149_IG07
RWD20 dd 00000000h, 00000000h, 00000000h
RWD32 dq 8000000000000000h, 8000000000000000h
RWD48 dq 3FDFFFFFFFFFFFFFh, 3FDFFFFFFFFFFFFFh
; Total bytes of code 142, prolog size 8, PerfScore 37.12, instruction count 35, allocated bytes for code 142 (MethodHash=d24ae7fa) for method RoundDisasm:Round(double,int):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:Round(float,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 8, 5 ) float -> mm0 single-def
; V01 arg1 [V01,T00] ( 5, 4 ) int -> rbx single-def
; V02 OutArgs [V02 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V03 tmp1 [V03,T02] ( 6, 3.50) float -> mm0 "Inline return value spill temp"
;* V04 tmp2 [V04 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V05 tmp3 [V05 ] ( 0, 0 ) float -> zero-ref "Inline return value spill temp"
; V06 tmp4 [V06,T03] ( 2, 0 ) ref -> rdx single-def "argument with side effect"
;
; Lcl frame size = 32
G_M39904_IG01: ;; offset=0x0000
53 push rbx
4883EC20 sub rsp, 32
C5F877 vzeroupper
8BDA mov ebx, edx
;; size=10 bbWeight=1 PerfScore 2.50
G_M39904_IG02: ;; offset=0x000A
83FB04 cmp ebx, 4
7763 ja SHORT G_M39904_IG10
8BCB mov ecx, ebx
488D1588000000 lea rdx, [reloc @RWD00]
8B148A mov edx, dword ptr [rdx+4*rcx]
488D05E8FFFFFF lea rax, G_M39904_IG02
4803D0 add rdx, rax
FFE2 jmp rdx
;; size=29 bbWeight=1 PerfScore 7.25
G_M39904_IG03: ;; offset=0x0027
C5F8100D91000000 vmovups xmm1, xmmword ptr [reloc @RWD32]
C5F828D0 vmovaps xmm2, xmm0
62F36D08250D92000000CA vpternlogd xmm1, xmm2, xmmword ptr [reloc @RWD48], -54
C5F258C0 vaddss xmm0, xmm1, xmm0
C4E3790AC00B vroundss xmm0, xmm0, xmm0, 11
EB1E jmp SHORT G_M39904_IG08
;; size=35 bbWeight=0.50 PerfScore 8.62
G_M39904_IG04: ;; offset=0x004A
C4E3790AC004 vroundss xmm0, xmm0, xmm0, 4
EB16 jmp SHORT G_M39904_IG08
;; size=8 bbWeight=0.50 PerfScore 4.50
G_M39904_IG05: ;; offset=0x0052
C4E3790AC00B vroundss xmm0, xmm0, xmm0, 11
EB0E jmp SHORT G_M39904_IG08
;; size=8 bbWeight=0.50 PerfScore 4.50
G_M39904_IG06: ;; offset=0x005A
C4E3790AC009 vroundss xmm0, xmm0, xmm0, 9
EB06 jmp SHORT G_M39904_IG08
;; size=8 bbWeight=0.50 PerfScore 4.50
G_M39904_IG07: ;; offset=0x0062
C4E3790AC00A vroundss xmm0, xmm0, xmm0, 10
;; size=6 bbWeight=0.50 PerfScore 3.50
G_M39904_IG08: ;; offset=0x0068
C5FA5AC0 vcvtss2sd xmm0, xmm0, xmm0
;; size=4 bbWeight=1 PerfScore 4.00
G_M39904_IG09: ;; offset=0x006C
4883C420 add rsp, 32
5B pop rbx
C3 ret
;; size=6 bbWeight=1 PerfScore 1.75
G_M39904_IG10: ;; offset=0x0072
B9840B0000 mov ecx, 0xB84
48BA004022BEFA7F0000 mov rdx, 0x7FFABE224000
E89AE34E5F call CORINFO_HELP_STRCNS
488BD0 mov rdx, rax
8BCB mov ecx, ebx
FF153F937C00 call [System.ThrowHelper:ThrowArgumentException_InvalidEnumValue[int](int,System.String)]
CC int3
;; size=32 bbWeight=0 PerfScore 0.00
RWD00 dd 00000040h ; case G_M39904_IG04
dd 0000001Dh ; case G_M39904_IG03
dd 00000048h ; case G_M39904_IG05
dd 00000050h ; case G_M39904_IG06
dd 00000058h ; case G_M39904_IG07
RWD20 dd 00000000h, 00000000h, 00000000h
RWD32 dq 8000000080000000h, 8000000080000000h
RWD48 dq 3EFFFFFF3EFFFFFFh, 3EFFFFFF3EFFFFFFh
; Total bytes of code 146, prolog size 8, PerfScore 41.12, instruction count 36, allocated bytes for code 146 (MethodHash=8938641f) for method RoundDisasm:Round(float,int):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:Round2(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 2 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T02] ( 4, 3.50) double -> mm0 single-def
; V01 OutArgs [V01 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 4, 7 ) double -> mm1 "Inlining Arg"
;* V03 tmp2 [V03 ] ( 0, 0 ) struct (16) zero-ref do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[double]>
; V04 tmp3 [V04,T04] ( 3, 1.50) double -> mm1 "Inline stloc first use temp"
; V05 tmp4 [V05,T00] ( 3, 3 ) struct (16) [rsp+0x28] do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[double]>
; V06 tmp5 [V06,T05] ( 2, 1 ) double -> mm0 "Inline return value spill temp"
;* V07 tmp6 [V07 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
; V08 tmp7 [V08,T03] ( 2, 2 ) double -> mm0 "Inlining Arg"
;* V09 tmp8 [V09 ] ( 0, 0 ) double -> zero-ref "Inline return value spill temp"
;* V10 tmp9 [V10 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 56
G_M8040_IG01: ;; offset=0x0000
4883EC38 sub rsp, 56
C5F877 vzeroupper
33C0 xor eax, eax
4889442428 mov qword ptr [rsp+0x28], rax
;; size=14 bbWeight=1 PerfScore 2.50
G_M8040_IG02: ;; offset=0x000E
C5F828C8 vmovaps xmm1, xmm0
C5F0541546000000 vandps xmm2, xmm1, xmmword ptr [reloc @RWD00]
C5FB101D4E000000 vmovsd xmm3, qword ptr [reloc @RWD16]
C5F92EDA vucomisd xmm3, xmm2
762F jbe SHORT G_M8040_IG04
;; size=26 bbWeight=1 PerfScore 8.25
G_M8040_IG03: ;; offset=0x0028
C744243010000000 mov dword ptr [rsp+0x30], 16
48B8206AB71AFB7F0000 mov rax, 0x7FFB1AB76A20 ; static handle
4889442428 mov bword ptr [rsp+0x28], rax
488B442428 mov rax, bword ptr [rsp+0x28]
C5FB104810 vmovsd xmm1, qword ptr [rax+0x10]
C5FB59C1 vmulsd xmm0, xmm0, xmm1
C4E3790BC004 vroundsd xmm0, xmm0, xmm0, 4
C5FB5EC9 vdivsd xmm1, xmm0, xmm1
;; size=47 bbWeight=0.50 PerfScore 14.62
G_M8040_IG04: ;; offset=0x0057
C5F828C1 vmovaps xmm0, xmm1
;; size=4 bbWeight=1 PerfScore 0.25
G_M8040_IG05: ;; offset=0x005B
4883C438 add rsp, 56
C3 ret
;; size=5 bbWeight=1 PerfScore 1.25
RWD00 dq 7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
RWD16 dq 4341C37937E08000h ; 1e+16
; Total bytes of code 96, prolog size 14, PerfScore 26.88, instruction count 20, allocated bytes for code 96 (MethodHash=166ce097) for method RoundDisasm:Round2(double):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:Round2(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 2 single block inlinees; 3 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T02] ( 4, 3.50) float -> mm0 single-def
; V01 OutArgs [V01 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 4, 7 ) float -> mm1 "Inlining Arg"
;* V03 tmp2 [V03 ] ( 0, 0 ) struct (16) zero-ref do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[float]>
; V04 tmp3 [V04,T04] ( 3, 1.50) float -> mm1 "Inline stloc first use temp"
; V05 tmp4 [V05,T00] ( 3, 3 ) struct (16) [rsp+0x28] do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[float]>
; V06 tmp5 [V06,T05] ( 2, 1 ) float -> mm0 "Inline return value spill temp"
;* V07 tmp6 [V07 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
; V08 tmp7 [V08,T03] ( 2, 2 ) float -> mm0 "Inlining Arg"
;* V09 tmp8 [V09 ] ( 0, 0 ) float -> zero-ref "Inline return value spill temp"
;* V10 tmp9 [V10 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 56
G_M52813_IG01: ;; offset=0x0000
4883EC38 sub rsp, 56
C5F877 vzeroupper
33C0 xor eax, eax
4889442428 mov qword ptr [rsp+0x28], rax
;; size=14 bbWeight=1 PerfScore 2.50
G_M52813_IG02: ;; offset=0x000E
C5F828C8 vmovaps xmm1, xmm0
C5F0541546000000 vandps xmm2, xmm1, xmmword ptr [reloc @RWD00]
C5FA101D4E000000 vmovss xmm3, dword ptr [reloc @RWD16]
C5F82EDA vucomiss xmm3, xmm2
762F jbe SHORT G_M52813_IG04
;; size=26 bbWeight=1 PerfScore 8.25
G_M52813_IG03: ;; offset=0x0028
C744243007000000 mov dword ptr [rsp+0x30], 7
48B868DCB71AFB7F0000 mov rax, 0x7FFB1AB7DC68 ; static handle
4889442428 mov bword ptr [rsp+0x28], rax
488B442428 mov rax, bword ptr [rsp+0x28]
C5FA104808 vmovss xmm1, dword ptr [rax+0x08]
C5FA59C1 vmulss xmm0, xmm0, xmm1
C4E3790AC004 vroundss xmm0, xmm0, xmm0, 4
C5FA5EC9 vdivss xmm1, xmm0, xmm1
;; size=47 bbWeight=0.50 PerfScore 13.62
G_M52813_IG04: ;; offset=0x0057
C5FA5AC1 vcvtss2sd xmm0, xmm0, xmm1
;; size=4 bbWeight=1 PerfScore 4.00
G_M52813_IG05: ;; offset=0x005B
4883C438 add rsp, 56
C3 ret
;; size=5 bbWeight=1 PerfScore 1.25
RWD00 dq 7FFFFFFF7FFFFFFFh, 7FFFFFFF7FFFFFFFh
RWD16 dd 4CBEBC20h ; 1e+08
; Total bytes of code 96, prolog size 14, PerfScore 29.62, instruction count 20, allocated bytes for code 96 (MethodHash=81b131b2) for method RoundDisasm:Round2(float):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:Round4(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 1 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T02] ( 4, 3.50) double -> mm0 single-def
; V01 OutArgs [V01 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 4, 7 ) double -> mm1 "Inlining Arg"
;* V03 tmp2 [V03 ] ( 0, 0 ) struct (16) zero-ref do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[double]>
; V04 tmp3 [V04,T04] ( 3, 1.50) double -> mm1 "Inline stloc first use temp"
; V05 tmp4 [V05,T00] ( 3, 3 ) struct (16) [rsp+0x28] do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[double]>
; V06 tmp5 [V06,T05] ( 2, 1 ) double -> mm0 "Inline return value spill temp"
;* V07 tmp6 [V07 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
; V08 tmp7 [V08,T03] ( 2, 2 ) double -> mm0 "Inlining Arg"
;* V09 tmp8 [V09 ] ( 0, 0 ) double -> zero-ref "Inline return value spill temp"
;* V10 tmp9 [V10 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 56
G_M22062_IG01: ;; offset=0x0000
4883EC38 sub rsp, 56
C5F877 vzeroupper
33C0 xor eax, eax
4889442428 mov qword ptr [rsp+0x28], rax
;; size=14 bbWeight=1 PerfScore 2.50
G_M22062_IG02: ;; offset=0x000E
C5F828C8 vmovaps xmm1, xmm0
C5F0541546000000 vandps xmm2, xmm1, xmmword ptr [reloc @RWD00]
C5FB101D4E000000 vmovsd xmm3, qword ptr [reloc @RWD16]
C5F92EDA vucomisd xmm3, xmm2
762F jbe SHORT G_M22062_IG04
;; size=26 bbWeight=1 PerfScore 8.25
G_M22062_IG03: ;; offset=0x0028
C744243010000000 mov dword ptr [rsp+0x30], 16
48B8206AB71AFB7F0000 mov rax, 0x7FFB1AB76A20 ; static handle
4889442428 mov bword ptr [rsp+0x28], rax
488B442428 mov rax, bword ptr [rsp+0x28]
C5FB104820 vmovsd xmm1, qword ptr [rax+0x20]
C5FB59C1 vmulsd xmm0, xmm0, xmm1
C4E3790BC00B vroundsd xmm0, xmm0, xmm0, 11
C5FB5EC9 vdivsd xmm1, xmm0, xmm1
;; size=47 bbWeight=0.50 PerfScore 14.62
G_M22062_IG04: ;; offset=0x0057
C5F828C1 vmovaps xmm0, xmm1
;; size=4 bbWeight=1 PerfScore 0.25
G_M22062_IG05: ;; offset=0x005B
4883C438 add rsp, 56
C3 ret
;; size=5 bbWeight=1 PerfScore 1.25
RWD00 dq 7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
RWD16 dq 4341C37937E08000h ; 1e+16
; Total bytes of code 96, prolog size 14, PerfScore 26.88, instruction count 20, allocated bytes for code 96 (MethodHash=b456a9d1) for method RoundDisasm:Round4(double):double (FullOpts)
; ============================================================
; Assembly listing for method RoundDisasm:Round4(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 3 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T02] ( 4, 3.50) float -> mm0 single-def
; V01 OutArgs [V01 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 4, 7 ) float -> mm1 "Inlining Arg"
;* V03 tmp2 [V03 ] ( 0, 0 ) struct (16) zero-ref do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[float]>
; V04 tmp3 [V04,T04] ( 3, 1.50) float -> mm1 "Inline stloc first use temp"
; V05 tmp4 [V05,T00] ( 3, 3 ) struct (16) [rsp+0x28] do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[float]>
; V06 tmp5 [V06,T05] ( 2, 1 ) float -> mm0 "Inline return value spill temp"
;* V07 tmp6 [V07 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
; V08 tmp7 [V08,T03] ( 2, 2 ) float -> mm0 "Inlining Arg"
;* V09 tmp8 [V09 ] ( 0, 0 ) float -> zero-ref "Inline return value spill temp"
;* V10 tmp9 [V10 ] ( 0, 0 ) ref -> zero-ref "argument with side effect"
;
; Lcl frame size = 56
G_M34635_IG01: ;; offset=0x0000
4883EC38 sub rsp, 56
C5F877 vzeroupper
33C0 xor eax, eax
4889442428 mov qword ptr [rsp+0x28], rax
;; size=14 bbWeight=1 PerfScore 2.50
G_M34635_IG02: ;; offset=0x000E
C5F828C8 vmovaps xmm1, xmm0
C5F0541546000000 vandps xmm2, xmm1, xmmword ptr [reloc @RWD00]
C5FA101D4E000000 vmovss xmm3, dword ptr [reloc @RWD16]
C5F82EDA vucomiss xmm3, xmm2
762F jbe SHORT G_M34635_IG04
;; size=26 bbWeight=1 PerfScore 8.25
G_M34635_IG03: ;; offset=0x0028
C744243007000000 mov dword ptr [rsp+0x30], 7
48B868DCB71AFB7F0000 mov rax, 0x7FFB1AB7DC68 ; static handle
4889442428 mov bword ptr [rsp+0x28], rax
488B442428 mov rax, bword ptr [rsp+0x28]
C5FA104810 vmovss xmm1, dword ptr [rax+0x10]
C5FA59C1 vmulss xmm0, xmm0, xmm1
C4E3790AC00B vroundss xmm0, xmm0, xmm0, 11
C5FA5EC9 vdivss xmm1, xmm0, xmm1
;; size=47 bbWeight=0.50 PerfScore 13.62
G_M34635_IG04: ;; offset=0x0057
C5FA5AC1 vcvtss2sd xmm0, xmm0, xmm1
;; size=4 bbWeight=1 PerfScore 4.00
G_M34635_IG05: ;; offset=0x005B
4883C438 add rsp, 56
C3 ret
;; size=5 bbWeight=1 PerfScore 1.25
RWD00 dq 7FFFFFFF7FFFFFFFh, 7FFFFFFF7FFFFFFFh
RWD16 dd 4CBEBC20h ; 1e+08
; Total bytes of code 96, prolog size 14, PerfScore 29.62, instruction count 20, allocated bytes for code 96 (MethodHash=cd4078b4) for method RoundDisasm:Round4(float):double (FullOpts)
; ============================================================ Fixes #98164.
|
tannergooding
approved these changes
Feb 9, 2024
Sign up for free
to subscribe to this conversation on GitHub.
Already have an account?
Sign in.
Labels
area-System.Numerics
community-contribution
Indicates that the PR has been added by a community member
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Simplifies and optimizes parameterized Round overloads.
Code:
.NET 8
This branch
Fixes #98164.