From 7c1636e17604099516fe3ef5f6056e8ab49daf01 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 19 Apr 2016 21:38:23 +0000 Subject: [PATCH 1/6] Remove old PowerPC64 workaround for TOC issues I'm not seeing any TOC issues on recent LLVM versions, so remove the workaround. --- src/codegen.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 783ea574fc5e2..c7f34f8465709 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5698,11 +5698,7 @@ extern "C" void jl_init_codegen(void) cl::ParseEnvironmentOptions("Julia", "JULIA_LLVM_ARGS"); #endif -#if defined(_CPU_PPC_) || defined(_CPU_PPC64_) - imaging_mode = true; // LLVM seems to JIT bad TOC tables for the optimizations we attempt in non-imaging_mode -#else imaging_mode = jl_generating_output(); -#endif jl_init_debuginfo(); jl_init_runtime_ccall(); From 9b6e83f0f302f614fecb9c884026811bb3d64f85 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 10 May 2016 07:33:55 +0000 Subject: [PATCH 2/6] Remove PowerPC little endian disassembly workaround Disassembling little endian PowerPC instructions is now fixed upstream with LLVM commit r242288, so we can remove this workaround. --- src/disasm.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index 6ded32a556bbc..1ddd689194b7d 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -563,18 +563,7 @@ void jl_dump_asm_internal(uintptr_t Fptr, size_t Fsize, int64_t slide, MCInst Inst; MCDisassembler::DecodeStatus S; -#if defined(_CPU_PPC64_) && BYTE_ORDER == LITTLE_ENDIAN - // llvm doesn't know that POWER8 can have little-endian instruction order - unsigned char byte_swap_buf[4]; - assert(memoryObject.size() >= 4); - byte_swap_buf[3] = memoryObject[Index+0]; - byte_swap_buf[2] = memoryObject[Index+1]; - byte_swap_buf[1] = memoryObject[Index+2]; - byte_swap_buf[0] = memoryObject[Index+3]; - FuncMCView view = FuncMCView(byte_swap_buf, 4); -#else FuncMCView view = memoryObject.slice(Index); -#endif S = DisAsm->getInstruction(Inst, insSize, view, 0, /*REMOVE*/ nulls(), nulls()); switch (S) { From 0892089e3ae8fa2fe84a3816da8d67cadca3bbb9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 11 May 2016 22:37:02 +0000 Subject: [PATCH 3/6] Disable fast isel on PowerPC64 Fast isel is causing a number of test case failures on PowerPC64. Disable it until we solve the LLVM issues. --- src/codegen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index c7f34f8465709..e86d6988de3ff 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5797,7 +5797,7 @@ extern "C" void jl_init_codegen(void) targetFeatures); assert(jl_TargetMachine && "Failed to select target machine -" " Is the LLVM backend for this CPU enabled?"); -#if defined(USE_MCJIT) && !defined(_CPU_ARM_) +#if defined(USE_MCJIT) && (!defined(_CPU_ARM_) && !defined(_CPU_PPC64_)) // FastISel seems to be buggy for ARM. Ref #13321 if (jl_options.opt_level < 3) jl_TargetMachine->setFastISel(true); From 51455517b9d1d66dfd9dd86273ad1de7ac69020d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 19 Apr 2016 21:59:16 +0000 Subject: [PATCH 4/6] Add LLVM patch that fixes PowerPC64 JIT issue This fixes an issue when reusing a JIT context on PowerPC64. --- deps/Makefile | 1 + deps/llvm-D17712.patch | 52 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 deps/llvm-D17712.patch diff --git a/deps/Makefile b/deps/Makefile index f6d78a7d7cd5b..344b515a84b39 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -728,6 +728,7 @@ $(eval $(call LLVM_PATCH,llvm-3.8.0_winshlib)) $(eval $(call LLVM_PATCH,llvm-3.8.0_threads)) # fix replutil test on unix $(eval $(call LLVM_PATCH,llvm-D17165-D18583)) +$(eval $(call LLVM_PATCH,llvm-D17712)) endif # LLVM_VER ifeq ($(LLVM_VER),3.7.1) diff --git a/deps/llvm-D17712.patch b/deps/llvm-D17712.patch new file mode 100644 index 0000000000000..4488be41f3d36 --- /dev/null +++ b/deps/llvm-D17712.patch @@ -0,0 +1,52 @@ +From 6f20310e9eede9ab68fea1ffdfba9881b8b80684 Mon Sep 17 00:00:00 2001 +From: Nemanja Ivanovic +Date: Sat, 12 Mar 2016 10:23:07 +0000 +Subject: [PATCH] Fix for PR 26378 + +This patch corresponds to review: +http://reviews.llvm.org/D17712 + +We were not clearing the TOC vector in PPCAsmPrinter when initializing it. This +caused duplicate definition asserts when the pass is reused on the module +(i.e. with -compile-twice or in JIT contexts). + + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263338 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/PowerPC/PPCAsmPrinter.cpp | 6 ++++++ + test/CodeGen/PowerPC/pr26378.ll | 6 ++++++ + 2 files changed, 12 insertions(+) + create mode 100644 test/CodeGen/PowerPC/pr26378.ll + +diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp +index ec354c2..f206652 100644 +--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp ++++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp +@@ -82,6 +82,12 @@ public: + + MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); + ++ virtual bool doInitialization(Module &M) override { ++ if (!TOC.empty()) ++ TOC.clear(); ++ return AsmPrinter::doInitialization(M); ++ } ++ + void EmitInstruction(const MachineInstr *MI) override; + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); +diff --git a/test/CodeGen/PowerPC/pr26378.ll b/test/CodeGen/PowerPC/pr26378.ll +new file mode 100644 +index 0000000..e5e2055 +--- /dev/null ++++ b/test/CodeGen/PowerPC/pr26378.ll +@@ -0,0 +1,6 @@ ++; RUN: llc -compile-twice -filetype obj \ ++; RUN: -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 < %s ++@foo = common global i32 0, align 4 ++define i8* @blah() #0 { ++ ret i8* bitcast (i32* @foo to i8*) ++} +-- +2.7.4 + From 1b3f49e6eeb35e5e2918ad92b820a3eabf84d48e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 10 May 2016 10:46:58 +0000 Subject: [PATCH 5/6] Add LLVM patch that fixes some PowerPC64 little endian issues This fixes some little endian fast isel issues as well as setting a sane default for the generic target on little endian. --- deps/Makefile | 1 + deps/llvm-PR26180.patch | 93 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 deps/llvm-PR26180.patch diff --git a/deps/Makefile b/deps/Makefile index 344b515a84b39..5dafb4e1aed66 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -729,6 +729,7 @@ $(eval $(call LLVM_PATCH,llvm-3.8.0_threads)) # fix replutil test on unix $(eval $(call LLVM_PATCH,llvm-D17165-D18583)) $(eval $(call LLVM_PATCH,llvm-D17712)) +$(eval $(call LLVM_PATCH,llvm-PR26180)) endif # LLVM_VER ifeq ($(LLVM_VER),3.7.1) diff --git a/deps/llvm-PR26180.patch b/deps/llvm-PR26180.patch new file mode 100644 index 0000000000000..69c545fd40a71 --- /dev/null +++ b/deps/llvm-PR26180.patch @@ -0,0 +1,93 @@ +From 9411e53a301704bc48271641c0388b022006e0c7 Mon Sep 17 00:00:00 2001 +From: Nemanja Ivanovic +Date: Mon, 29 Feb 2016 16:42:27 +0000 +Subject: [PATCH] Fix for PR26180 + +Corresponds to Phabricator review: +http://reviews.llvm.org/D16592 + +This fix includes both an update to how we handle the "generic" CPU on LE +systems as well as Anton's fix for the Fast Isel issue. + + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@262233 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/PowerPC/PPCFastISel.cpp | 6 +++--- + lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++-- + lib/Target/PowerPC/PPCSubtarget.cpp | 2 +- + test/CodeGen/PowerPC/pr26180.ll | 14 ++++++++++++++ + 4 files changed, 20 insertions(+), 6 deletions(-) + create mode 100644 test/CodeGen/PowerPC/pr26180.ll + +diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp +index 3980ecf..1de2679 100644 +--- a/lib/Target/PowerPC/PPCFastISel.cpp ++++ b/lib/Target/PowerPC/PPCFastISel.cpp +@@ -1068,10 +1068,10 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, + if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) + return 0; + +- // Reload it into a GPR. If we want an i32, modify the address +- // to have a 4-byte offset so we load from the right place. ++ // Reload it into a GPR. If we want an i32 on big endian, modify the ++ // address to have a 4-byte offset so we load from the right place. + if (VT == MVT::i32) +- Addr.Offset = 4; ++ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; + + // Look at the currently assigned register for this instruction + // to determine the required register class. +diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp +index 59086f1..3fe7cf3 100644 +--- a/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -6163,11 +6163,11 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, + MPI, false, false, 0); + + // Result is a load from the stack slot. If loading 4 bytes, make sure to +- // add in a bias. ++ // add in a bias on big endian. + if (Op.getValueType() == MVT::i32 && !i32Stack) { + FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, + DAG.getConstant(4, dl, FIPtr.getValueType())); +- MPI = MPI.getWithOffset(4); ++ MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); + } + + RLI.Chain = Chain; +diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp +index c357c75..359c2eb 100644 +--- a/lib/Target/PowerPC/PPCSubtarget.cpp ++++ b/lib/Target/PowerPC/PPCSubtarget.cpp +@@ -110,7 +110,7 @@ void PPCSubtarget::initializeEnvironment() { + void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { + // Determine default and user specified characteristics + std::string CPUName = CPU; +- if (CPUName.empty()) { ++ if (CPUName.empty() || CPU == "generic") { + // If cross-compiling with -march=ppc64le without -mcpu + if (TargetTriple.getArch() == Triple::ppc64le) + CPUName = "ppc64le"; +diff --git a/test/CodeGen/PowerPC/pr26180.ll b/test/CodeGen/PowerPC/pr26180.ll +new file mode 100644 +index 0000000..e4cbcb8 +--- /dev/null ++++ b/test/CodeGen/PowerPC/pr26180.ll +@@ -0,0 +1,14 @@ ++; RUN: llc -mcpu=generic -mtriple=powerpc64le-unknown-unknown -O0 < %s | FileCheck %s --check-prefix=GENERIC ++; RUN: llc -mcpu=ppc -mtriple=powerpc64le-unknown-unknown -O0 < %s | FileCheck %s ++ ++define i32 @bad(double %x) { ++ %1 = fptoui double %x to i32 ++ ret i32 %1 ++} ++ ++; CHECK: fctidz 1, 1 ++; CHECK: stfd 1, [[OFF:.*]](1) ++; CHECK: lwz {{[0-9]*}}, [[OFF]](1) ++; GENERIC: fctiwuz 1, 1 ++; GENERIC: stfd 1, [[OFF:.*]](1) ++; GENERIC: lwz {{[0-9]*}}, [[OFF]](1) +-- +2.7.4 + From d925dcd28f02426636bc0616215607c3ebb6d8b2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 11 May 2016 11:18:27 +0000 Subject: [PATCH 6/6] Add LLVM patch that fixes verifier issue on PowerPC64 --- deps/Makefile | 1 + deps/llvm-PR27046.patch | 70 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 deps/llvm-PR27046.patch diff --git a/deps/Makefile b/deps/Makefile index 5dafb4e1aed66..d6e65ff52ba81 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -730,6 +730,7 @@ $(eval $(call LLVM_PATCH,llvm-3.8.0_threads)) $(eval $(call LLVM_PATCH,llvm-D17165-D18583)) $(eval $(call LLVM_PATCH,llvm-D17712)) $(eval $(call LLVM_PATCH,llvm-PR26180)) +$(eval $(call LLVM_PATCH,llvm-PR27046)) endif # LLVM_VER ifeq ($(LLVM_VER),3.7.1) diff --git a/deps/llvm-PR27046.patch b/deps/llvm-PR27046.patch new file mode 100644 index 0000000000000..f59556ffd0bbb --- /dev/null +++ b/deps/llvm-PR27046.patch @@ -0,0 +1,70 @@ +From 18569e85b3d63024d522b732f3a5694eda5f9110 Mon Sep 17 00:00:00 2001 +From: Chuang-Yu Cheng +Date: Tue, 12 Apr 2016 03:10:52 +0000 +Subject: [PATCH] [PPC64] Mark CR0 Live if PPCInstrInfo::optimizeCompareInstr + Creates a Use of CR0 + +Resolve Bug 27046 (https://llvm.org/bugs/show_bug.cgi?id=27046). +The PPCInstrInfo::optimizeCompareInstr function could create a new use of +CR0, even if CR0 were previously dead. This patch marks CR0 live if a use of +CR0 is created. + +Author: Tom Jablin (tjablin) +Reviewers: hfinkel kbarton cycheng + +http://reviews.llvm.org/D18884 + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266040 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/PowerPC/PPCInstrInfo.cpp | 4 ++++ + test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll | 23 +++++++++++++++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll + +diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp +index 76f97b1..b6dc00f 100644 +--- a/lib/Target/PowerPC/PPCInstrInfo.cpp ++++ b/lib/Target/PowerPC/PPCInstrInfo.cpp +@@ -1763,6 +1763,10 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, + get(TargetOpcode::COPY), CRReg) + .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); + ++ // Even if CR0 register were dead before, it is alive now since the ++ // instruction we just built uses it. ++ MI->clearRegisterDeads(PPC::CR0); ++ + if (MIOpC != NewOpC) { + // We need to be careful here: we're replacing one instruction with + // another, and we need to make sure that we get all of the right +diff --git a/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll b/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll +new file mode 100644 +index 0000000..3c38209 +--- /dev/null ++++ b/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll +@@ -0,0 +1,23 @@ ++; RUN: llc -print-before=peephole-opts -print-after=peephole-opts -mtriple=powerpc64-unknown-linux-gnu -o /dev/null 2>&1 < %s | FileCheck %s ++ ++define signext i32 @fn1(i32 %baz) { ++ %1 = mul nsw i32 %baz, 208 ++ %2 = zext i32 %1 to i64 ++ %3 = shl i64 %2, 48 ++ %4 = ashr exact i64 %3, 48 ++; CHECK: ANDIo8 {{[^,]+}}, 65520, %CR0; ++; CHECK: CMPLDI ++; CHECK: BCC ++ ++; CHECK: ANDIo8 {{[^,]+}}, 65520, %CR0; ++; CHECK: COPY %CR0 ++; CHECK: BCC ++ %5 = icmp eq i64 %4, 0 ++ br i1 %5, label %foo, label %bar ++ ++foo: ++ ret i32 1 ++ ++bar: ++ ret i32 0 ++} +-- +2.7.4 +