Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -74,6 +74,7 @@ void initializeCallGraphWrapperPassPass(PassRegistry &); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); +void initializeBoolRetToIntPass(PassRegistry &); void initializeBoundsCheckingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -65,6 +65,7 @@ (void) llvm::createTypeBasedAAWrapperPass(); (void) llvm::createScopedNoAliasAAWrapperPass(); (void) llvm::createBoundsCheckingPass(); + (void) llvm::createBoolRetToIntPass(); (void) llvm::createBreakCriticalEdgesPass(); (void) llvm::createCallGraphPrinterPass(); (void) llvm::createCallGraphViewerPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -486,6 +486,12 @@ // FunctionPass *createLoopLoadEliminationPass(); +//===----------------------------------------------------------------------===// +// +// BoolRetToInt - Convert i1 to i32 type. +// +Pass *createBoolRetToIntPass(); + } // End llvm namespace #endif Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -282,6 +282,8 @@ } void PPCPassConfig::addIRPasses() { + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch Index: lib/Transforms/Scalar/BoolRetToInt.cpp =================================================================== --- lib/Transforms/Scalar/BoolRetToInt.cpp +++ lib/Transforms/Scalar/BoolRetToInt.cpp @@ -0,0 +1,258 @@ +//===- BoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting i1 values to i32 if they could be +// more profitably allocated as GPRs rather than CRs. This pass will +// become totally unnecessary if Register Bank Allocation and Global +// Instruction Selection ever go upstream. +// +// Presently, the pass converts i1 Constants, Arguments, and CallInsts +// to i32 if the transitive closure of their uses includes only +// PHINodes, CallInsts, and ReturnInsts. The rational is that +// arguments are generally passed and returned in GPRs rather than +// CRs, so casting them to i32 at the LLVM IR level will actually save +// casts at the Machine Instruction level. +// +// It might be useful to expand this pass to add bit-wise operations +// to the list of safe transitive closure types. Also, we miss some +// opportunities when LLVM represents logical AND and OR operations +// with control flow rather than data flow. For example by lowering +// the expression: return (A && B && C) +// +// as: return A ? true : B && C. +// +// There's code in SimplifyCFG that code be used to turn control flow +// in data flow using SelectInsts. Selects are slow on some +// architectures (PPC), so this probably isn't good in general, but +// for the special case of i1, the Selects could be further lowered to +// bit operations that are fast everywhere. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +#define DEBUG_TYPE "bool-ret-to-int" + +STATISTIC(NumBoolRetPromotion, + "Number of times a bool feeding a RetInst was promoted to an int"); +STATISTIC(NumBoolCallPromotion, + "Number of times a bool feeding a CallInst was promoted to an int"); +STATISTIC(NumBoolToIntPromotion, + "Total number of times a bool was promoted to an int"); + +class BoolRetToInt : public FunctionPass { + + static SmallPtrSet findAllDefs(Value *V) { + SmallPtrSet Defs; + SmallVector WorkList; + WorkList.push_back(V); + Defs.insert(V); + while (!WorkList.empty()) { + Value *Curr = WorkList.back(); + WorkList.pop_back(); + if (User *CurrUser = dyn_cast(Curr)) + for (auto &Op : CurrUser->operands()) + if (Defs.insert(Op).second) + WorkList.push_back(Op); + } + return Defs; + } + + // Translate a i1 value to an equivalent i32 value: + static Value *translate(Value *V) { + Type *Int32Ty = Type::getInt32Ty(V->getContext()); + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getZExt(C, Int32Ty); + + + if (PHINode *P = dyn_cast(V)) { + // Temporarily set the operands to 0. We'll fix this later in + // runOnUse. + Value *Zero = Constant::getNullValue(Int32Ty); + PHINode *Q = + PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + for (unsigned i = 0; i < P->getNumOperands(); ++i) + Q->addIncoming(Zero, P->getIncomingBlock(i)); + return Q; + } + + Argument *A = dyn_cast(V); + Instruction *I = dyn_cast(V); + assert((A || I) && "Unknown value type"); + + auto InstPt = A ? + &*A->getParent()->getEntryBlock().begin() : + I->getNextNode(); + return new ZExtInst(V, Int32Ty, "", InstPt); + } + + typedef SmallPtrSet PHINodeSet; + + // A PHINode is Promotable if: + // 1. Its type is i1 AND + // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic + // AND + // 3. All of its operands are Constant or Argument or + // CallInst or PHINode AND + // 4. All of its PHINode uses are Promotable AND + // 5. All of its PHINode operands are Promotable + static PHINodeSet getPromotablePHINodes(const Function &F) { + PHINodeSet Promotable; + + // Condition 1 + for (auto &BB : F) + for (auto &I : BB) + if (const PHINode *P = dyn_cast(&I)) + if (P->getType()->isIntegerTy(1)) + Promotable.insert(P); + + SmallVector toRemove; + for (const auto &P : Promotable) { + + // Condition 2 + for (const auto &U : P->users()) + if (!isa(U) && !isa(U) && !isa(U) && + !isa(U)) + toRemove.push_back(P); + + // Condition 3 + for (auto &Op : P->operands()) + if (!isa(Op) && !isa(Op) && + !isa(Op) && !isa(Op)) + toRemove.push_back(P); + } + + // Iterate to convergence + while (!toRemove.empty()) { + for (auto &User : toRemove) + Promotable.erase(User); + toRemove.clear(); + + for (const auto &P : Promotable) { + + // Condition 4 + for (const auto &U : P->users()) + if (const PHINode *Phi = dyn_cast(U)) + if (!Promotable.count(Phi)) + toRemove.push_back(P); + + // Condition 5 + for (auto &Op : P->operands()) + if (const PHINode *Phi = dyn_cast(Op)) + if (!Promotable.count(Phi)) + toRemove.push_back(P); + } + } + + return Promotable; + } + + typedef DenseMap B2IMap; + +public: + static char ID; + BoolRetToInt() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) { + + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); + B2IMap Bool2IntMap; + bool Changed = false; + for (auto &BB : F) { + for (auto &I : BB) { + if (ReturnInst *R = dyn_cast(&I)) + if (F.getReturnType()->isIntegerTy(1)) + Changed |= + runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap); + + if (CallInst *CI = dyn_cast(&I)) + for (auto &U : CI->operands()) + if (U->getType()->isIntegerTy(1)) + Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap); + } + } + + return Changed; + } + + static bool runOnUse(Use &U, + const PHINodeSet &PromotablePHINodes, + B2IMap &BoolToIntMap) { + auto Defs = findAllDefs(U); + + // If the values are all Constants or Arguments, don't bother + if (!std::any_of(Defs.begin(), Defs.end(), isa)) + return false; + + // Presently, we only know how to handle PHINode, Constant, + // Argument, and CallInst. Potentially, bitwise operations (AND, + // OR, XOR, NOT) could also be handled in the future. + for (const auto &V : Defs) + if (!isa(V) && !isa(V) && + !isa(V) && !isa(V)) + return false; + + for (const auto &V : Defs) + if (const PHINode *P = dyn_cast(V)) + if (!PromotablePHINodes.count(P)) + return false; + + if (isa(U.getUser())) ++NumBoolRetPromotion; + if (isa(U.getUser())) ++NumBoolCallPromotion; + ++NumBoolToIntPromotion; + + for (const auto &V : Defs) + if (!BoolToIntMap.count(V)) + BoolToIntMap[V] = translate(V); + + // Replace the operands of the translated instructions. There were + // set to zero in the translate function. + for (auto &Pair : BoolToIntMap) { + User *First = dyn_cast(Pair.first); + User *Second = dyn_cast(Pair.second); + assert((!First || Second) && "translated from user to non-user!?"); + if (First) + for (unsigned i = 0; i < First->getNumOperands(); ++i) + Second->setOperand(i, BoolToIntMap[First->getOperand(i)]); + } + + Value *IntRetVal = BoolToIntMap[U]; + Type *Int1Ty = Type::getInt1Ty(U->getContext()); + Instruction *I = cast(U.getUser()); + Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I); + U.set(BackToBool); + + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } +}; +} + +char BoolRetToInt::ID = 0; +INITIALIZE_PASS(BoolRetToInt, "bool-ret-to-int", + "Convert i1 constants to i32 if they are returned", false, + false) + +Pass *llvm::createBoolRetToIntPass() { return new BoolRetToInt(); } Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ ADCE.cpp AlignmentFromAssumptions.cpp BDCE.cpp + BoolRetToInt.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -32,6 +32,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCELegacyPassPass(Registry); initializeBDCEPass(Registry); + initializeBoolRetToIntPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); initializeConstantHoistingPass(Registry); initializeConstantPropagationPass(Registry); Index: test/Transforms/BoolRetToInt/BoolRetToIntTest.ll =================================================================== --- test/Transforms/BoolRetToInt/BoolRetToIntTest.ll +++ test/Transforms/BoolRetToInt/BoolRetToIntTest.ll @@ -0,0 +1,193 @@ +; RUN: opt -bool-ret-to-int -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK: i32 @notBoolRet() +define signext i32 @notBoolRet() { +entry: +; CHECK: ret i32 1 + ret i32 1 +} + +; CHECK-LABEL: find +define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: retFalse +define zeroext i1 @retFalse() { +entry: +; CHECK: ret i1 false + ret i1 false +} + +; CHECK-LABEL: retCvtFalse +define zeroext i1 @retCvtFalse() { +entry: +; CHECK: ret i1 false + ret i1 trunc(i32 0 to i1) +} + +; CHECK-LABEL: find_cont +define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) + ret void +} + +; CHECK-LABEL: find_cont_ret +define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: arg_operand +define zeroext i1 @arg_operand(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo ], [ %operand, %entry ] + ret i1 %result +} + +; CHECK-LABEL: bad_use +define zeroext i1 @bad_use(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo], [ true, %entry ] + %0 = icmp eq i1 %result, %operand + ret i1 %result +} + +; CHECK-LABEL: bad_use_closure +define zeroext i1 @bad_use_closure(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + %0 = icmp eq i1 %bar, %operand + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 [ true +; CHECK: ret i1 [[REG]] + %result = phi i1 [ true, %entry ], [ %bar, %foo] + ret i1 %result +} + +; CHECK-LABEL: arg_test +define zeroext i1 @arg_test(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + br label %cleanup + +; CHECK-LABEL: cleanup +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ %bar, %foo], [ %operand, %entry ] + ret i1 %result +} \ No newline at end of file