Index: lib/Target/PowerPC/CMakeLists.txt =================================================================== --- lib/Target/PowerPC/CMakeLists.txt +++ lib/Target/PowerPC/CMakeLists.txt @@ -13,6 +13,7 @@ add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen + PPCBoolRetToInt.cpp PPCAsmPrinter.cpp PPCBranchSelector.cpp PPCCTRLoops.cpp Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -44,10 +44,12 @@ FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCTLSDynamicCallPass(); + FunctionPass *createPPCBoolRetToIntPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); void initializePPCVSXFMAMutatePass(PassRegistry&); + void initializePPCBoolRetToIntPass(PassRegistry&); extern char &PPCVSXFMAMutateID; namespace PPCII { Index: lib/Target/PowerPC/PPCBoolRetToInt.cpp =================================================================== --- lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -0,0 +1,253 @@ +//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting i1 values to i32 if they could be more +// profitably allocated as GPRs rather than CRs. This pass will become totally +// unnecessary if Register Bank Allocation and Global Instruction Selection ever +// go upstream. +// +// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// transitive closure of their uses includes only PHINodes, CallInsts, and +// ReturnInsts. The rational is that arguments are generally passed and returned +// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// actually save casts at the Machine Instruction level. +// +// It might be useful to expand this pass to add bit-wise operations to the list +// of safe transitive closure types. Also, we miss some opportunities when LLVM +// represents logical AND and OR operations with control flow rather than data +// flow. For example by lowering the expression: return (A && B && C) +// +// as: return A ? true : B && C. +// +// There's code in SimplifyCFG that code be used to turn control flow in data +// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so +// this probably isn't good in general, but for the special case of i1, the +// Selects could be further lowered to bit operations that are fast everywhere. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +#define DEBUG_TYPE "bool-ret-to-int" + +STATISTIC(NumBoolRetPromotion, + "Number of times a bool feeding a RetInst was promoted to an int"); +STATISTIC(NumBoolCallPromotion, + "Number of times a bool feeding a CallInst was promoted to an int"); +STATISTIC(NumBoolToIntPromotion, + "Total number of times a bool was promoted to an int"); + +class PPCBoolRetToInt : public FunctionPass { + + static SmallPtrSet findAllDefs(Value *V) { + SmallPtrSet Defs; + SmallVector WorkList; + WorkList.push_back(V); + Defs.insert(V); + while (!WorkList.empty()) { + Value *Curr = WorkList.back(); + WorkList.pop_back(); + if (User *CurrUser = dyn_cast(Curr)) + for (auto &Op : CurrUser->operands()) + if (Defs.insert(Op).second) + WorkList.push_back(Op); + } + return Defs; + } + + // Translate a i1 value to an equivalent i32 value: + static Value *translate(Value *V) { + Type *Int32Ty = Type::getInt32Ty(V->getContext()); + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getZExt(C, Int32Ty); + if (PHINode *P = dyn_cast(V)) { + // Temporarily set the operands to 0. We'll fix this later in + // runOnUse. + Value *Zero = Constant::getNullValue(Int32Ty); + PHINode *Q = + PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + for (unsigned i = 0; i < P->getNumOperands(); ++i) + Q->addIncoming(Zero, P->getIncomingBlock(i)); + return Q; + } + + Argument *A = dyn_cast(V); + Instruction *I = dyn_cast(V); + assert((A || I) && "Unknown value type"); + + auto InstPt = + A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); + return new ZExtInst(V, Int32Ty, "", InstPt); + } + + typedef SmallPtrSet PHINodeSet; + + // A PHINode is Promotable if: + // 1. Its type is i1 AND + // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic + // AND + // 3. All of its operands are Constant or Argument or + // CallInst or PHINode AND + // 4. All of its PHINode uses are Promotable AND + // 5. All of its PHINode operands are Promotable + static PHINodeSet getPromotablePHINodes(const Function &F) { + PHINodeSet Promotable; + // Condition 1 + for (auto &BB : F) + for (auto &I : BB) + if (const PHINode *P = dyn_cast(&I)) + if (P->getType()->isIntegerTy(1)) + Promotable.insert(P); + + SmallVector ToRemove; + for (const auto &P : Promotable) { + // Condition 2 and 3 + auto IsValidUser = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + auto IsValidOperand = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsValidUser) || + !std::all_of(Operands.begin(), Operands.end(), IsValidOperand)) + ToRemove.push_back(P); + } + + // Iterate to convergence + auto IsPromotable = [&Promotable] (const Value *V) -> bool { + const PHINode *Phi = dyn_cast(V); + return !Phi || Promotable.count(Phi); + }; + while (!ToRemove.empty()) { + for (auto &User : ToRemove) + Promotable.erase(User); + ToRemove.clear(); + + for (const auto &P : Promotable) { + // Condition 4 and 5 + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsPromotable) || + !std::all_of(Operands.begin(), Operands.end(), IsPromotable)) + ToRemove.push_back(P); + } + } + + return Promotable; + } + + typedef DenseMap B2IMap; + + public: + static char ID; + PPCBoolRetToInt() : FunctionPass(ID) { + initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) { + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); + B2IMap Bool2IntMap; + bool Changed = false; + for (auto &BB : F) { + for (auto &I : BB) { + if (ReturnInst *R = dyn_cast(&I)) + if (F.getReturnType()->isIntegerTy(1)) + Changed |= + runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap); + + if (CallInst *CI = dyn_cast(&I)) + for (auto &U : CI->operands()) + if (U->getType()->isIntegerTy(1)) + Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap); + } + } + + return Changed; + } + + static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + B2IMap &BoolToIntMap) { + auto Defs = findAllDefs(U); + + // If the values are all Constants or Arguments, don't bother + if (!std::any_of(Defs.begin(), Defs.end(), isa)) + return false; + + // Presently, we only know how to handle PHINode, Constant, and Arguments. + // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension + // could also be handled in the future. + for (const auto &V : Defs) + if (!isa(V) && !isa(V) && !isa(V)) + return false; + + for (const auto &V : Defs) + if (const PHINode *P = dyn_cast(V)) + if (!PromotablePHINodes.count(P)) + return false; + + if (isa(U.getUser())) + ++NumBoolRetPromotion; + if (isa(U.getUser())) + ++NumBoolCallPromotion; + ++NumBoolToIntPromotion; + + for (const auto &V : Defs) + if (!BoolToIntMap.count(V)) + BoolToIntMap[V] = translate(V); + + // Replace the operands of the translated instructions. There were set to + // zero in the translate function. + for (auto &Pair : BoolToIntMap) { + User *First = dyn_cast(Pair.first); + User *Second = dyn_cast(Pair.second); + assert((!First || Second) && "translated from user to non-user!?"); + if (First) + for (unsigned i = 0; i < First->getNumOperands(); ++i) + Second->setOperand(i, BoolToIntMap[First->getOperand(i)]); + } + + Value *IntRetVal = BoolToIntMap[U]; + Type *Int1Ty = Type::getInt1Ty(U->getContext()); + Instruction *I = cast(U.getUser()); + Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I); + U.set(BackToBool); + + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } +}; +} + +char PPCBoolRetToInt::ID = 0; +INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", + "Convert i1 constants to i32 if they are returned", + false, false) + +FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -67,6 +67,9 @@ RegisterTargetMachine A(ThePPC32Target); RegisterTargetMachine B(ThePPC64Target); RegisterTargetMachine C(ThePPC64LETarget); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializePPCBoolRetToIntPass(PR); } /// Return the datalayout string of a subtarget. @@ -282,6 +285,8 @@ } void PPCPassConfig::addIRPasses() { + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createPPCBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch Index: test/CodeGen/PowerPC/BoolRetToIntTest.ll =================================================================== --- test/CodeGen/PowerPC/BoolRetToIntTest.ll +++ test/CodeGen/PowerPC/BoolRetToIntTest.ll @@ -0,0 +1,203 @@ +; RUN: opt -bool-ret-to-int -S -o - < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK-LABEL: notBoolRet +define signext i32 @notBoolRet() { +entry: +; CHECK: ret i32 1 + ret i32 1 +} + +; CHECK-LABEL: find +define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: retFalse +define zeroext i1 @retFalse() { +entry: +; CHECK: ret i1 false + ret i1 false +} + +; CHECK-LABEL: retCvtFalse +define zeroext i1 @retCvtFalse() { +entry: +; CHECK: ret i1 false + ret i1 trunc(i32 0 to i1) +} + +; CHECK-LABEL: find_cont +define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) + ret void +} + +; CHECK-LABEL: find_cont_ret +define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: arg_operand +define zeroext i1 @arg_operand(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo ], [ %operand, %entry ] + ret i1 %result +} + +; CHECK-LABEL: bad_use +define zeroext i1 @bad_use(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo], [ true, %entry ] + %0 = icmp eq i1 %result, %operand + ret i1 %result +} + +; CHECK-LABEL: bad_use_closure +define zeroext i1 @bad_use_closure(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + %0 = icmp eq i1 %bar, %operand + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 [ true +; CHECK: ret i1 [[REG]] + %result = phi i1 [ true, %entry ], [ %bar, %foo] + ret i1 %result +} + +; CHECK-LABEL: arg_test +define zeroext i1 @arg_test(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + br label %cleanup + +; CHECK-LABEL: cleanup +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ %bar, %foo], [ %operand, %entry ] + ret i1 %result +} + +declare zeroext i1 @return_i1() + +; CHECK-LABEL: call_test +define zeroext i1 @call_test() { +; CHECK: [[REG:%.+]] = call i1 + %result = call i1 @return_i1() +; CHECK: ret i1 [[REG]] + ret i1 %result +} \ No newline at end of file