Skip to content

Commit

Permalink
Merge pull request #3756 from Sonicadvance1/fix_vmovhlps
Browse files Browse the repository at this point in the history
Fix VMOVLHPS instruction
  • Loading branch information
Sonicadvance1 authored Jun 25, 2024
2 parents 7ff9622 + 3e8d780 commit 3a310b8
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 17 deletions.
19 changes: 14 additions & 5 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -748,17 +748,26 @@ void OpDispatchBuilder::AVX128_MOVQ(OpcodeArgs) {
void OpDispatchBuilder::AVX128_VMOVLP(OpcodeArgs) {
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);

if (Op->Dest.IsGPR()) {
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);

if (!Op->Dest.IsGPR()) {
///< VMOVLPS/PD mem64, xmm1
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src1.Low, OpSize::i64Bit, OpSize::i64Bit);
} else if (!Op->Src[1].IsGPR()) {
///< VMOVLPS/PD xmm1, xmm2, mem64
// Bits[63:0] come from Src2[63:0]
// Bits[127:64] come from Src1[127:64]
Ref Result_Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2.Low, Src1.Low);
auto Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], OpSize::i64Bit, Op->Flags);
Ref Result_Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2, Src1.Low);
Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
} else {
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src1.Low, OpSize::i64Bit, OpSize::i64Bit);
///< VMOVHLPS/PD xmm1, xmm2, xmm3
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);

Ref Result_Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src1.Low, Src2.Low);
Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
}
}

Expand Down
20 changes: 14 additions & 6 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,23 @@ void OpDispatchBuilder::MOVLPOp(OpcodeArgs) {
}

void OpDispatchBuilder::VMOVLPOp(OpcodeArgs) {
if (Op->Dest.IsGPR()) {
Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16});
Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16});

if (!Op->Dest.IsGPR()) {
///< VMOVLPS/PD mem64, xmm1
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src1, 8, 8);
} else if (!Op->Src[1].IsGPR()) {
///< VMOVLPS/PD xmm1, xmm2, mem64
// Bits[63:0] come from Src2[63:0]
// Bits[127:64] come from Src1[127:64]
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 8});
Ref Result = _VInsElement(16, 8, 0, 0, Src1, Src2);

Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2, Src1);
StoreResult(FPRClass, Op, Result, -1);
} else {
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8});
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, 8, 8);
///< VMOVHLPS/PD xmm1, xmm2, xmm3
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 16});
Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src1, Src2);
StoreResult(FPRClass, Op, Result, -1);
}
}

Expand Down
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> VEXTableOps = []() consteval {
{OPD(1, 0b10, 0x11), 1, X86InstInfo{"VMOVSS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b11, 0x11), 1, X86InstInfo{"VMOVSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},

{OPD(1, 0b00, 0x12), 1, X86InstInfo{"VMOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b01, 0x12), 1, X86InstInfo{"VMOVLPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b00, 0x12), 1, X86InstInfo{"VMOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b01, 0x12), 1, X86InstInfo{"VMOVLPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b10, 0x12), 1, X86InstInfo{"VMOVSLDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b11, 0x12), 1, X86InstInfo{"VMOVDDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},

Expand Down
34 changes: 34 additions & 0 deletions unittests/ASM/VEX/vmovhlps.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0x4150f0e342241b6c", "0xdddddddddddddddd", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
"XMM3": ["0x4150f0e342241b6c", "0xdddddddddddddddd", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"],
"XMM6": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"]
}
}
%endif

; Load inputs
vmovapd ymm1, [rel .data]
vmovapd ymm2, [rel .data + 32]
vmovapd ymm5, [rel .data_random]
vmovapd ymm6, [rel .data_random]

vmovhlps xmm1, xmm2, xmm5
vmovhlps xmm3, xmm1, xmm5

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD
db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF

.data_random:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303

8 changes: 4 additions & 4 deletions unittests/InstructionCountCI/VEX_map1.json
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@
],
"ExpectedArm64ASM": [
"ldr q2, [x4]",
"mov v16.16b, v17.16b",
"mov v16.d[0], v2.d[0]"
"mov v16.16b, v2.16b",
"mov v16.d[1], v17.d[1]"
]
},
"vmovlpd xmm0, xmm1, [rax]": {
Expand All @@ -228,8 +228,8 @@
],
"ExpectedArm64ASM": [
"ldr q2, [x4]",
"mov v16.16b, v17.16b",
"mov v16.d[0], v2.d[0]"
"mov v16.16b, v2.16b",
"mov v16.d[1], v17.d[1]"
]
},
"vmovsldup xmm0, [rax]": {
Expand Down

0 comments on commit 3a310b8

Please sign in to comment.