Skip to content

Commit

Permalink
[MCA][LSUnit] Correctly update the internal group flags on store barr…
Browse files Browse the repository at this point in the history
…ier execution. Fixes PR48024.

This is likely to be a regressigion introduced by my last refactoring of the
LSUnit (commit 6410676). Before this patch, the
"CurrentStoreBarrierGroupID" index was not correctly reset on store barrier
executions.  This was leading to unexpected crashes like the one reported as
PR48024.

(cherry picked from commit 7bf58a9)
  • Loading branch information
adibiagio authored and tstellar committed Nov 21, 2020
1 parent e95dfb6 commit 02d599f
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 0 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/MCA/HardwareUnits/LSUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ void LSUnit::onInstructionExecuted(const InstRef &IR) {
CurrentStoreGroupID = 0;
if (GroupID == CurrentLoadBarrierGroupID)
CurrentLoadBarrierGroupID = 0;
if (GroupID == CurrentStoreBarrierGroupID)
CurrentStoreBarrierGroupID = 0;
}
}

Expand Down
104 changes: 104 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/stmxcsr-ldmxcsr.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 < %s | FileCheck %s

# Code snippet taken from PR48024.

stmxcsr -4(%rsp)
movl $-24577, %eax # imm = 0x9FFF
andl -4(%rsp), %eax
movl %eax, -8(%rsp)
ldmxcsr -8(%rsp)
retq

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 704
# CHECK-NEXT: Total uOps: 600

# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.85
# CHECK-NEXT: IPC: 0.85
# CHECK-NEXT: Block RThroughput: 3.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 * U stmxcsr -4(%rsp)
# CHECK-NEXT: 1 1 0.50 movl $-24577, %eax
# CHECK-NEXT: 1 4 1.00 * andl -4(%rsp), %eax
# CHECK-NEXT: 1 1 1.00 * movl %eax, -8(%rsp)
# CHECK-NEXT: 1 3 1.00 * U ldmxcsr -8(%rsp)
# CHECK-NEXT: 1 4 1.00 U retq

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 1.50 1.50 - - - - - 3.00 - 2.00 - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - stmxcsr -4(%rsp)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movl $-24577, %eax
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - andl -4(%rsp), %eax
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - movl %eax, -8(%rsp)
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - ldmxcsr -8(%rsp)
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - retq

# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234

# CHECK: [0,0] DeER . . . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [0,1] DeER . . . . . movl $-24577, %eax
# CHECK-NEXT: [0,2] .DeeeeER . . . . andl -4(%rsp), %eax
# CHECK-NEXT: [0,3] .D====eER . . . . movl %eax, -8(%rsp)
# CHECK-NEXT: [0,4] . D===eeeER . . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [0,5] . DeeeeE--R . . . retq
# CHECK-NEXT: [1,0] . D===eE--R . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [1,1] . DeE-----R . . . movl $-24577, %eax
# CHECK-NEXT: [1,2] . D====eeeeER. . . andl -4(%rsp), %eax
# CHECK-NEXT: [1,3] . D========eER . . movl %eax, -8(%rsp)
# CHECK-NEXT: [1,4] . D=======eeeER . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [1,5] . D=eeeeE-----R . . retq
# CHECK-NEXT: [2,0] . .D=======eE--R . . stmxcsr -4(%rsp)
# CHECK-NEXT: [2,1] . .DeE---------R . . movl $-24577, %eax
# CHECK-NEXT: [2,2] . . D========eeeeER . andl -4(%rsp), %eax
# CHECK-NEXT: [2,3] . . D============eER . movl %eax, -8(%rsp)
# CHECK-NEXT: [2,4] . . D===========eeeER ldmxcsr -8(%rsp)
# CHECK-NEXT: [2,5] . . D=eeeeE---------R retq

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 4.3 1.0 1.3 stmxcsr -4(%rsp)
# CHECK-NEXT: 1. 3 1.0 1.0 4.7 movl $-24577, %eax
# CHECK-NEXT: 2. 3 5.0 0.3 0.0 andl -4(%rsp), %eax
# CHECK-NEXT: 3. 3 9.0 0.0 0.0 movl %eax, -8(%rsp)
# CHECK-NEXT: 4. 3 8.0 0.0 0.0 ldmxcsr -8(%rsp)
# CHECK-NEXT: 5. 3 1.7 1.7 5.3 retq
# CHECK-NEXT: 3 4.8 0.7 1.9 <total>
100 changes: 100 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Haswell/stmxcsr-ldmxcsr.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -timeline-max-iterations=3 < %s | FileCheck %s

# Code snippet taken from PR48024.

stmxcsr -4(%rsp)
movl $-24577, %eax # imm = 0x9FFF
andl -4(%rsp), %eax
movl %eax, -8(%rsp)
ldmxcsr -8(%rsp)
retq

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 1304
# CHECK-NEXT: Total uOps: 1300

# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 0.46
# CHECK-NEXT: Block RThroughput: 3.3

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 3 2 1.00 * U stmxcsr -4(%rsp)
# CHECK-NEXT: 1 1 0.25 movl $-24577, %eax
# CHECK-NEXT: 2 6 0.50 * andl -4(%rsp), %eax
# CHECK-NEXT: 1 1 1.00 * movl %eax, -8(%rsp)
# CHECK-NEXT: 3 7 1.00 * U ldmxcsr -8(%rsp)
# CHECK-NEXT: 3 7 1.00 U retq

# CHECK: Resources:
# CHECK-NEXT: [0] - HWDivider
# CHECK-NEXT: [1] - HWFPDivider
# CHECK-NEXT: [2] - HWPort0
# CHECK-NEXT: [3] - HWPort1
# CHECK-NEXT: [4] - HWPort2
# CHECK-NEXT: [5] - HWPort3
# CHECK-NEXT: [6] - HWPort4
# CHECK-NEXT: [7] - HWPort5
# CHECK-NEXT: [8] - HWPort6
# CHECK-NEXT: [9] - HWPort7

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 1.75 1.74 1.67 1.68 2.00 1.75 1.76 1.65

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - - - 0.30 - 1.00 1.00 - 0.70 stmxcsr -4(%rsp)
# CHECK-NEXT: - - 0.08 0.67 - - - 0.04 0.21 - movl $-24577, %eax
# CHECK-NEXT: - - 0.42 0.37 0.35 0.65 - 0.01 0.20 - andl -4(%rsp), %eax
# CHECK-NEXT: - - - - 0.05 - 1.00 - - 0.95 movl %eax, -8(%rsp)
# CHECK-NEXT: - - 1.00 0.23 0.34 0.66 - 0.42 0.35 - ldmxcsr -8(%rsp)
# CHECK-NEXT: - - 0.25 0.47 0.63 0.37 - 0.28 1.00 - retq

# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 012

# CHECK: [0,0] DeeER. . . . . . . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [0,1] DeE-R. . . . . . . . . movl $-24577, %eax
# CHECK-NEXT: [0,2] .DeeeeeeER. . . . . . . . andl -4(%rsp), %eax
# CHECK-NEXT: [0,3] .D======eER . . . . . . . movl %eax, -8(%rsp)
# CHECK-NEXT: [0,4] . D=====eeeeeeeER . . . . . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [0,5] . DeeeeeeeE----R . . . . . . retq
# CHECK-NEXT: [1,0] . D====eeE----R . . . . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [1,1] . DeE---------R . . . . . . movl $-24577, %eax
# CHECK-NEXT: [1,2] . D=========eeeeeeER . . . . . andl -4(%rsp), %eax
# CHECK-NEXT: [1,3] . D===============eER . . . . . movl %eax, -8(%rsp)
# CHECK-NEXT: [1,4] . .D==============eeeeeeeER. . . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [1,5] . . DeeeeeeeE-------------R. . . . retq
# CHECK-NEXT: [2,0] . . D=============eeE----R. . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [2,1] . . DeE------------------R. . . . movl $-24577, %eax
# CHECK-NEXT: [2,2] . . D==================eeeeeeER . . andl -4(%rsp), %eax
# CHECK-NEXT: [2,3] . . D========================eER . . movl %eax, -8(%rsp)
# CHECK-NEXT: [2,4] . . D=======================eeeeeeeER ldmxcsr -8(%rsp)
# CHECK-NEXT: [2,5] . . .DeeeeeeeE----------------------R retq

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 6.7 1.0 2.7 stmxcsr -4(%rsp)
# CHECK-NEXT: 1. 3 1.0 1.0 9.3 movl $-24577, %eax
# CHECK-NEXT: 2. 3 10.0 0.3 0.0 andl -4(%rsp), %eax
# CHECK-NEXT: 3. 3 16.0 0.0 0.0 movl %eax, -8(%rsp)
# CHECK-NEXT: 4. 3 15.0 0.0 0.0 ldmxcsr -8(%rsp)
# CHECK-NEXT: 5. 3 1.0 1.0 13.0 retq
# CHECK-NEXT: 3 8.3 0.6 4.2 <total>

0 comments on commit 02d599f

Please sign in to comment.