Index: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2159,6 +2159,8 @@ } bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { + if (MF.getInfo()->shrinkWrapDisabled()) + return false; return (MF.getSubtarget().isSVR4ABI() && MF.getSubtarget().isPPC64()); } Index: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -417,6 +417,16 @@ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); } } else { + // We must ensure that this sequence is dominated by the prologue. + // FIXME: This is a bit of a big hammer since we don't get the benefits + // of shrink-wrapping whenever we emit this instruction. Considering + // this is used in any function where we emit a jump table, this may be + // a significant limitation. We should consider inserting this in the + // block where it is used and then commoning this sequence up if it + // appears in multiple places. + // Note: on ISA 3.0 cores, we can use lnia (addpcis) insteand of + // MovePCtoLR8. + MF->getInfo()->setShrinkWrapDisabled(true); GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); Index: llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -45,6 +45,11 @@ /// PEI. bool MustSaveLR; + /// Do we have to disable shrink-wrapping? This has to be set if we emit any + /// instructions that clobber LR in the entry block because discovering this + /// in PEI is too late (happens after shrink-wrapping); + bool ShrinkWrapDisabled = false; + /// Does this function have any stack spills. bool HasSpills = false; @@ -147,6 +152,12 @@ void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + /// We certainly don't want to shrink wrap functions if we've emitted a + /// MovePCtoLR8 as that has to go into the entry, so the prologue definitely + /// has to go into the entry block. + void setShrinkWrapDisabled(bool U) { ShrinkWrapDisabled = U; } + bool shrinkWrapDisabled() const { return ShrinkWrapDisabled; } + void setHasSpills() { HasSpills = true; } bool hasSpills() const { return HasSpills; } Index: llvm/trunk/test/CodeGen/PowerPC/pr33547.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/pr33547.ll +++ llvm/trunk/test/CodeGen/PowerPC/pr33547.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -mcpu=pwr8 -code-model=large < %s | FileCheck %s +%struct.STATICS1 = type <{ [128 x i8] }> + +@.STATICS1 = internal global %struct.STATICS1 <{ [128 x i8] c"\09\00\00\00\03\00\00\00\05\00\00\00\04\00\00\00\0A\00\00\00\0A\00\00\00\0B\00\00\00\0A\08\AF/\B8\B6\87\04 \A1\07\00\08\9D\00\00\09\00\00\00\05\00\00\00\03\00\00\00\03\00\00\00\05\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" }>, align 16 +@.C302_MAIN_ = internal constant i32 4 + +; Function Attrs: noinline norecurse nounwind +define void @main() { +L.entry: + tail call void @testFunc(i64* bitcast (i8* getelementptr inbounds (%struct.STATICS1, %struct.STATICS1* @.STATICS1, i64 0, i32 0, i64 124) to i64*), i64* bitcast (i32* @.C302_MAIN_ to i64*)) + ret void +} + +; Function Attrs: noinline norecurse nounwind readonly +define signext i32 @ifunc_(i64* nocapture readonly %i) { +; CHECK-LABEL: ifunc_: +; CHECK: # %bb.0: # %L.entry +; CHECK-NEXT: lwa 3, 0(3) +; CHECK-NEXT: blr +L.entry: + %0 = bitcast i64* %i to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +; Function Attrs: noinline norecurse nounwind +define void @testFunc(i64* nocapture %r, i64* nocapture readonly %k) { +; CHECK-LABEL: testFunc +; CHECK: mflr 0 +; CHECK: std 0, 16(1) +; CHECK: bl .[[BRANCHNEXT:[L0-9\$a-z]+]] +; CHECK-NEXT: [[BRANCHNEXT]] +L.entry: + %0 = bitcast i64* %k to i32* + %1 = load i32, i32* %0, align 4 + switch i32 %1, label %L.LB3_307 [ + i32 1, label %L.LB3_307.sink.split + i32 3, label %L.LB3_307.sink.split + i32 4, label %L.LB3_321.split + i32 5, label %L.LB3_307.sink.split + i32 6, label %infloop.preheader + i32 2, label %infloop11.preheader + ] + +infloop11.preheader: ; preds = %L.entry + br label %infloop11 + +infloop.preheader: ; preds = %L.entry + br label %infloop + +L.LB3_321.split: ; preds = %L.entry + br label %L.LB3_307.sink.split + +L.LB3_307.sink.split: ; preds = %L.LB3_321.split, %L.entry, %L.entry, %L.entry + %.sink = phi i32 [ 5, %L.LB3_321.split ], [ -3, %L.entry ], [ -3, %L.entry ], [ -3, %L.entry ] + %2 = bitcast i64* %r to i32* + store i32 %.sink, i32* %2, align 4 + br label %L.LB3_307 + +L.LB3_307: ; preds = %L.LB3_307.sink.split, %L.entry + ret void + +infloop: ; preds = %infloop.preheader, %infloop + br label %infloop + +infloop11: ; preds = %infloop11.preheader, %infloop11 + br label %infloop11 +}