Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -74,6 +74,7 @@ void initializeCallGraphWrapperPassPass(PassRegistry &); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); +void initializeBoolRetToIntPass(PassRegistry &); void initializeBoundsCheckingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -65,6 +65,7 @@ (void) llvm::createTypeBasedAAWrapperPass(); (void) llvm::createScopedNoAliasAAWrapperPass(); (void) llvm::createBoundsCheckingPass(); + (void) llvm::createBoolRetToIntPass(); (void) llvm::createBreakCriticalEdgesPass(); (void) llvm::createCallGraphPrinterPass(); (void) llvm::createCallGraphViewerPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -486,6 +486,12 @@ // FunctionPass *createLoopLoadEliminationPass(); +//===----------------------------------------------------------------------===// +// +// BoolRetToInt - Convert i1 to i32 type. +// +Pass *createBoolRetToIntPass(); + } // End llvm namespace #endif Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -282,6 +282,8 @@ } void PPCPassConfig::addIRPasses() { + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch Index: lib/Transforms/Scalar/BoolRetToInt.cpp =================================================================== --- lib/Transforms/Scalar/BoolRetToInt.cpp +++ lib/Transforms/Scalar/BoolRetToInt.cpp @@ -0,0 +1,197 @@ +//===- BoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting i1 literals to i32 if they are +// returned. On some archs (PPC), i1 are allocated to CRs but +// returned in GPRs. Moving values from CRs to GPRS is relatively +// expensive. Converting to i32 keeps them in GPRS. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +class BoolRetToInt : public FunctionPass { + + static SmallPtrSet findAllDefs(Value *V) { + SmallPtrSet Defs; + SmallVector WorkList; + WorkList.push_back(V); + Defs.insert(V); + while (!WorkList.empty()) { + Value *Curr = WorkList.back(); + WorkList.pop_back(); + if (User *CurrUser = dyn_cast(Curr)) + for (auto &Op : CurrUser->operands()) + if (Defs.insert(Op).second) + WorkList.push_back(Op); + } + return Defs; + } + + static Value *translate(Value *V) { + Type *int32Ty = Type::getInt32Ty(V->getContext()); + Value *zero = Constant::getNullValue(int32Ty); + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getZExt(C, int32Ty); + + if (PHINode *P = dyn_cast(V)) { + PHINode *Q = + PHINode::Create(int32Ty, P->getNumIncomingValues(), P->getName(), P); + for (unsigned i = 0; i < P->getNumOperands(); ++i) + Q->addIncoming(zero, P->getIncomingBlock(i)); + return Q; + } + + llvm_unreachable("Unexpected value"); + } + + typedef SmallPtrSet PHINodeSet; + + // A PHINode is Promotable if: + // 1. Its type is i1 AND + // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic + // AND + // 3. All of its operands are Constant or PHINode AND + // 4. All of its PHINode uses are Promotable AND + // 5. All of its PHINode operands are Promotable + static PHINodeSet getPromotablePHINodes(const Function &F) { + PHINodeSet Promotable; + + // Condition 1 + for (auto &BB : F) + for (auto &I : BB) + if (const PHINode *P = dyn_cast(&I)) + if (P->getType()->isIntegerTy(1)) + Promotable.insert(P); + + SmallVector toRemove; + for (const auto &P : Promotable) { + + // Condition 2 + for (const auto &U : P->users()) + if (!isa(U) && !isa(U) && !isa(U) && + !isa(U)) + toRemove.push_back(P); + + // Condition 3 + for (auto &Op : P->operands()) + if (!isa(Op) && !isa(Op)) + toRemove.push_back(P); + } + + // Iterate to convergence + while (!toRemove.empty()) { + for (auto &User : toRemove) + Promotable.erase(User); + toRemove.clear(); + + for (const auto &P : Promotable) { + + // Condition 4 + for (const auto &U : P->users()) + if (const PHINode *Phi = dyn_cast(U)) + if (!Promotable.count(Phi)) + toRemove.push_back(P); + + // Condition 5 + for (auto &Op : P->operands()) + if (const PHINode *Phi = dyn_cast(Op)) + if (!Promotable.count(Phi)) + toRemove.push_back(P); + } + } + + return Promotable; + } + +public: + static char ID; + BoolRetToInt() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) { + + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); + bool Changed = false; + for (auto &BB : F) { + for (auto &I : BB) { + if (ReturnInst *R = dyn_cast(&I)) + if (F.getReturnType()->isIntegerTy(1)) + Changed |= runOnUse(R->getOperandUse(0), PromotablePHINodes); + + if (CallInst *CI = dyn_cast(&I)) + for (auto &U : CI->operands()) + if (U->getType()->isIntegerTy(1)) + Changed |= runOnUse(U, PromotablePHINodes); + } + } + + return Changed; + } + + static bool runOnUse(Use &U, PHINodeSet &PromotablePHINodes) { + auto Defs = findAllDefs(U); + + if (std::all_of(Defs.begin(), Defs.end(), isa)) + return false; + + for (const auto &V : Defs) + if (!isa(V) && !isa(V)) + return false; + + for (const auto &V : Defs) + if (const PHINode *P = dyn_cast(V)) + if (!PromotablePHINodes.count(P)) + return false; + + DenseMap BoolToIntMap; + for (const auto &V : Defs) + BoolToIntMap[V] = translate(V); + + for (auto &Pair : BoolToIntMap) { + User *First = dyn_cast(Pair.first); + User *Second = dyn_cast(Pair.second); + assert(!!First == !!Second && "translated from user to non-user!?"); + if (First && Second) + for (unsigned i = 0; i < First->getNumOperands(); ++i) + Second->setOperand(i, BoolToIntMap[First->getOperand(i)]); + } + + Value *IntRetVal = BoolToIntMap[U]; + Type *Int1Ty = Type::getInt1Ty(U->getContext()); + Instruction *I = cast(U.getUser()); + Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I); + U.set(BackToBool); + + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } +}; +} + +char BoolRetToInt::ID = 0; +INITIALIZE_PASS(BoolRetToInt, "bool-ret-to-int", + "Convert i1 constants to i32 if they are returned", false, + false) + +Pass *llvm::createBoolRetToIntPass() { return new BoolRetToInt(); } Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ ADCE.cpp AlignmentFromAssumptions.cpp BDCE.cpp + BoolRetToInt.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -32,6 +32,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCELegacyPassPass(Registry); initializeBDCEPass(Registry); + initializeBoolRetToIntPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); initializeConstantHoistingPass(Registry); initializeConstantPropagationPass(Registry); Index: test/Transforms/BoolRetToInt/BoolRetToIntTest.ll =================================================================== --- test/Transforms/BoolRetToInt/BoolRetToIntTest.ll +++ test/Transforms/BoolRetToInt/BoolRetToIntTest.ll @@ -0,0 +1,194 @@ +; RUN: opt -bool-ret-to-int -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK: i32 @notBoolRet() +define signext i32 @notBoolRet() { +entry: +; CHECK: ret i32 1 + ret i32 1 +} + +; CHECK-LABEL: find +define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: retFalse +define zeroext i1 @retFalse() { +entry: +; CHECK: ret i1 false + ret i1 false +} + +; CHECK-LABEL: retCvtFalse +define zeroext i1 @retCvtFalse() { +entry: +; CHECK: ret i1 false + ret i1 trunc(i32 0 to i1) +} + +; CHECK-LABEL: find_cont +define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) + ret void +} + +; CHECK-LABEL: find_cont_ret +define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: bad_operand +define zeroext i1 @bad_operand(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo ], [ %operand, %entry ] + ret i1 %result +} + +; CHECK-LABEL: bad_use +define zeroext i1 @bad_use(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo], [ true, %entry ] + %0 = icmp eq i1 %result, %operand + ret i1 %result +} + +; CHECK-LABEL: bad_use_closure +define zeroext i1 @bad_use_closure(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + %0 = icmp eq i1 %bar, %operand + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 [ true +; CHECK: ret i1 [[REG]] + %result = phi i1 [ true, %entry ], [ %bar, %foo] + ret i1 %result +} + +; CHECK-LABEL: bad_user_closure +define zeroext i1 @bad_user_closure(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + br label %cleanup + +; CHECK-LABEL: cleanup +cleanup: +; CHECK: [[REG:%.+]] = phi i1 [ true +; CHECK: ret i1 [[REG]] + %result = phi i1 [ true, %entry ], [ %bar, %foo] + %baz = phi i1 [ %bar, %foo], [ %operand, %entry ] + ret i1 %result +} \ No newline at end of file