Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -44,7 +44,7 @@ FunctionPass *createPPCQPXLoadSplatPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCTLSDynamicCallPass(); - FunctionPass *createPPCBoolRetToIntPass(); + FunctionPass *createPPCBoolRetToIntPass(PPCTargetMachine *TM); FunctionPass *createPPCExpandISELPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); Index: lib/Target/PowerPC/PPCBoolRetToInt.cpp =================================================================== --- lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements converting i1 values to i32 if they could be more +// This file implements converting i1 values to i32/i64 if they could be more // profitably allocated as GPRs rather than CRs. This pass will become totally // unnecessary if Register Bank Allocation and Global Instruction Selection ever // go upstream. // -// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the // transitive closure of their uses includes only PHINodes, CallInsts, and // ReturnInsts. The rational is that arguments are generally passed and returned -// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will // actually save casts at the Machine Instruction level. // // It might be useful to expand this pass to add bit-wise operations to the list @@ -33,6 +33,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -87,17 +88,19 @@ return Defs; } - // Translate a i1 value to an equivalent i32 value: - static Value *translate(Value *V) { - Type *Int32Ty = Type::getInt32Ty(V->getContext()); + // Translate a i1 value to an equivalent i32/i64 value: + Value *translate(Value *V) { + Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext()) + : Type::getInt32Ty(V->getContext()); + if (auto *C = dyn_cast(V)) - return ConstantExpr::getZExt(C, Int32Ty); + return ConstantExpr::getZExt(C, IntTy); if (auto *P = dyn_cast(V)) { // Temporarily set the operands to 0. We'll fix this later in // runOnUse. - Value *Zero = Constant::getNullValue(Int32Ty); + Value *Zero = Constant::getNullValue(IntTy); PHINode *Q = - PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P); for (unsigned i = 0; i < P->getNumOperands(); ++i) Q->addIncoming(Zero, P->getIncomingBlock(i)); return Q; @@ -109,7 +112,7 @@ auto InstPt = A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); - return new ZExtInst(V, Int32Ty, "", InstPt); + return new ZExtInst(V, IntTy, "", InstPt); } typedef SmallPtrSet PHINodeSet; @@ -177,7 +180,11 @@ public: static char ID; - PPCBoolRetToInt() : FunctionPass(ID) { + PPCBoolRetToInt() : FunctionPass(ID), TM(nullptr) { + initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry()); + } + + PPCBoolRetToInt(TargetMachine *&TM) : FunctionPass(ID), TM(TM) { initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry()); } @@ -185,6 +192,10 @@ if (skipFunction(F)) return false; + if (!TM) + return false; + ST = ((PPCTargetMachine*)TM)->getSubtargetImpl(F); + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); B2IMap Bool2IntMap; bool Changed = false; @@ -205,7 +216,7 @@ return Changed; } - static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, B2IMap &BoolToIntMap) { auto Defs = findAllDefs(U); @@ -262,13 +273,20 @@ AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } + +private: + const PPCSubtarget *ST; + TargetMachine *TM; }; } // end anonymous namespace char PPCBoolRetToInt::ID = 0; -INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", - "Convert i1 constants to i32 if they are returned", - false, false) - -FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } +INITIALIZE_TM_PASS(PPCBoolRetToInt, "bool-ret-to-int", + "Convert i1 constants to i32/i64 if they are returned", + false, false) + +FunctionPass *llvm::createPPCBoolRetToIntPass(PPCTargetMachine *TM) { + TargetMachine *pTM = TM; + return new PPCBoolRetToInt(pTM); +} Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -323,7 +323,7 @@ void PPCPassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createPPCBoolRetToIntPass()); + addPass(createPPCBoolRetToIntPass(&getPPCTargetMachine())); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch Index: test/CodeGen/PowerPC/BoolRetToIntTest-2.ll =================================================================== --- test/CodeGen/PowerPC/BoolRetToIntTest-2.ll +++ test/CodeGen/PowerPC/BoolRetToIntTest-2.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=32442 +; Don't generate zero extension for the return value. +; CHECK-NOT: clrldi + +define zeroext i1 @foo(i32 signext %i, i32* %p) { +entry: + %cmp = icmp eq i32 %i, 0 + br i1 %cmp, label %return, label %if.end + +if.end: + store i32 %i, i32* %p, align 4 + br label %return + +return: + %retval = phi i1 [ true, %if.end ], [ false, %entry ] + ret i1 %retval +} + Index: test/CodeGen/PowerPC/BoolRetToIntTest.ll =================================================================== --- test/CodeGen/PowerPC/BoolRetToIntTest.ll +++ test/CodeGen/PowerPC/BoolRetToIntTest.ll @@ -31,14 +31,14 @@ br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %cleanup.dest.slot.0 } @@ -78,14 +78,14 @@ br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: call void %cont(i1 [[REG]] tail call void %cont(i1 %cleanup.dest.slot.0) ret void @@ -112,17 +112,17 @@ br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: call void %cont(i1 [[REG]] tail call void %cont(i1 %cleanup.dest.slot.0) -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %cleanup.dest.slot.0 } @@ -136,7 +136,7 @@ br label %cleanup cleanup: -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] %result = phi i1 [ false, %foo ], [ %operand, %entry ] ret i1 %result @@ -186,7 +186,7 @@ ; CHECK-LABEL: cleanup cleanup: -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] %result = phi i1 [ %bar, %foo], [ %operand, %entry ] ret i1 %result @@ -198,8 +198,8 @@ define zeroext i1 @call_test() { ; CHECK: [[REG:%.+]] = call i1 %result = call i1 @return_i1() -; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i32 -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i64 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %result }