diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -18,8 +18,10 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/Allocator.h" +#include namespace llvm { +class CanonicalLoopInfo; /// An interface to create LLVM-IR for OpenMP directives. /// @@ -96,6 +98,17 @@ function_ref; + /// Callback type for loop body code generation. + /// + /// \param CodeGenIP is the insertion point where the loop's body code must be + /// placed. This will be a dedicated BasicBlock with a + /// conditional branch from the loop condition check and + /// terminated with an unconditional branch to the loop + /// latch. + /// \param IndVar is the induction variable usable at the insertion point. + using LoopBodyGenCallbackTy = + function_ref; + /// Callback type for variable privatization (think copy & default /// constructor). /// @@ -173,6 +186,75 @@ Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable); + /// Generator for the control flow structure of an OpenMP canonical loop. + /// + /// This generator operates on the logical iteration space of the loop, i.e. + /// the caller only has to provide a loop trip count of the loop as defined by + /// base language semantics. The trip count is interpreted as an unsigned + /// integer. The induction variable passed to \p BodyGenCB will be of the same + /// type and run from 0 to \p TripCount - 1. It is up to the callback to + /// convert the logical iteration variable to the loop counter variable in the + /// loop body. + /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the loop body code. + /// \param TripCount Number of iterations the loop body is executed. + /// + /// \returns An object representing the created control flow structure which + /// can be used for loop-associated directives. + CanonicalLoopInfo *CreateCanonicalLoop(const LocationDescription &Loc, + LoopBodyGenCallbackTy BodyGenCB, + Value *TripCount); + + /// Generator for the control flow structure of an OpenMP canonical loop. + /// + /// Instead of a logical iteration space, this allows specifying user-defined + /// loop counter values using increment, upper- and lower bounds. To + /// disambiguate the terminology when counting downwards, instead of lower + /// bounds we use \p Start for the loop counter value in the first body + /// iteration. + /// + /// Consider the following limitations: + /// + /// * A loop counter space over all integer values of its bit-width cannot be + /// represented. E.g using uint8_t, its loop trip count of 256 cannot be + /// stored into an 8 bit integer): + /// + /// DO I = 0, 255, 1 + /// + /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. + /// effectively counting downwards: + /// + /// for (uint8_t i = 100u; i > 0; i += 127u) + /// + /// + /// TODO: May need to add addtional parameters to represent: + /// + /// * Allow representing downcounting with unsigned integers. + /// + /// * Sign of the step and the comparison operator might disagree: + /// + /// for (int i = 0; i < 42; --i) + /// + // + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the loop body code. + /// \param Start Value of the loop counter for the first iterations. + /// \param Stop Loop counter values past this will stop the the + /// iterations. + /// \param Step Loop counter increment after each iteration; negative + /// means counting down. \param IsSigned Whether Start, Stop + /// and Stop are signed integers. + /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop + /// counter. + /// + /// \returns An object representing the created control flow structure which + /// can be used for loop-associated directives. + CanonicalLoopInfo *CreateCanonicalLoop(const LocationDescription &Loc, + LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, + bool IsSigned, bool InclusiveStop); + /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered @@ -310,6 +392,10 @@ /// Collection of regions that need to be outlined during finalization. SmallVector OutlineInfos; + /// Collection of owned canonical loop objects that eventually need to be + /// free'd. + std::forward_list LoopInfos; + /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } @@ -529,6 +615,105 @@ Value *getOMPCriticalRegionLock(StringRef CriticalName); }; +/// Class to represented the control flow structure of an OpenMP canonical loop. +/// +/// The control-flow structure is standardized for easy consumption by +/// directives associated with loops. For instance, the worksharing-loop +/// construct may change this control flow such that each loop iteration is +/// executed on only one thread. +/// +/// The control flow can be described as follows: +/// +/// Preheader +/// | +/// /-> Header +/// | | +/// | Cond---\ +/// | | | +/// | Body | +/// | | | +/// \--Latch | +/// | +/// Exit +/// | +/// After +/// +/// Code in the header, condition block, latch and exit block must not have any +/// side-effect. +/// +/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested +/// classes. +class CanonicalLoopInfo { + friend class OpenMPIRBuilder; + +private: + /// Whether this object currently represents a loop. + bool IsValid = false; + + BasicBlock *Preheader; + BasicBlock *Header; + BasicBlock *Cond; + BasicBlock *Body; + BasicBlock *Latch; + BasicBlock *Exit; + BasicBlock *After; + + /// Delete this loop if unused. + void eraseFromParent(); + +public: + /// The preheader ensures that there is only a single edge entering the loop. + /// Code that must be execute before any loop iteration can be emitted here, + /// such as computing the loop trip count and begin lifetime markers. Code in + /// the preheader is not considered part of the canonical loop. + BasicBlock *getPreheader() const { return Preheader; } + + /// The header is the entry for each iteration. In the canonical control flow, + /// it only contains the PHINode for the induction variable. + BasicBlock *getHeader() const { return Header; } + + /// The condition block computes whether there is another loop iteration. If + /// yes, branches to the body; otherwise to the exit block. + BasicBlock *getCond() const { return Cond; } + + /// The body block is the single entry for a loop iteration and not controlled + /// by CanonicalLoopInfo. It can contain arbitrary control flow but must + /// eventually branch to the \p Latch block. + BasicBlock *getBody() const { return Body; } + + /// Reaching the latch indicates the end of the loop body code. In the + /// canonical control flow, it only contains the increment of the induction + /// variable. + BasicBlock *getLatch() const { return Latch; } + + /// Reaching the exit indicates no more iterations are being executed. + BasicBlock *getExit() const { return Exit; } + + /// The after block is intended for clean-up code such as lifetime end + /// markers. It is separate from the exit block to ensure, analogous to the + /// preheader, it having just a single entry edge and being free from PHI + /// nodes should there be multiple loop exits (such as from break + /// statements/cancellations). + BasicBlock *getAfter() const { return After; } + + /// Returns the llvm::Value containing the number of loop iterations. I must + /// be valid in the preheader and always interpreted as an unsigned integer of + /// any bit-width. + Value *getTripCount() const { return Cond->front().getOperand(1); } + + /// Returns the instruction representing the current logical induction + /// variable. Always unsigned, always starting at 0 with an increment of one. + Instruction *getIndVar() const { return &Header->front(); } + + /// Return the insertion point for user code after the loop. + OpenMPIRBuilder::InsertPointTy getAfterIP() const { + return {After, After->begin()}; + }; + + /// Consistency self-check. + void assertOK() const; +}; + } // end namespace llvm #endif // LLVM_IR_IRBUILDER_H diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -813,6 +813,171 @@ /*Conditional*/ true, /*hasFinalize*/ true); } +CanonicalLoopInfo * +OpenMPIRBuilder::CreateCanonicalLoop(const LocationDescription &Loc, + LoopBodyGenCallbackTy BodyGenCB, + Value *TripCount) { + BasicBlock *BB = Loc.IP.getBlock(); + BasicBlock *NextBB = BB->getNextNode(); + Function *F = BB->getParent(); + Type *IndVarTy = TripCount->getType(); + + // Create the basic block structure. + BasicBlock *Preheader = + BasicBlock::Create(M.getContext(), "omp_for.preheader", F, NextBB); + BasicBlock *Header = + BasicBlock::Create(M.getContext(), "omp_for.header", F, NextBB); + BasicBlock *Cond = + BasicBlock::Create(M.getContext(), "omp_for.cond", F, NextBB); + BasicBlock *Body = + BasicBlock::Create(M.getContext(), "omp_for.body", F, NextBB); + BasicBlock *Latch = + BasicBlock::Create(M.getContext(), "omp_for.inc", F, NextBB); + BasicBlock *Exit = + BasicBlock::Create(M.getContext(), "omp_for.exit", F, NextBB); + BasicBlock *After = + BasicBlock::Create(M.getContext(), "omp_for.after", F, NextBB); + + updateToLocation(Loc); + Builder.CreateBr(Preheader); + + Builder.SetInsertPoint(Preheader); + Builder.CreateBr(Header); + + Builder.SetInsertPoint(Header); + PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_for.iv"); + IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader); + Builder.CreateBr(Cond); + + Builder.SetInsertPoint(Cond); + Value *Cmp = Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_for.cmp"); + Builder.CreateCondBr(Cmp, Body, Exit); + + Builder.SetInsertPoint(Body); + Builder.CreateBr(Latch); + + Builder.SetInsertPoint(Latch); + Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), + "omp_for.next", /*HasNUW=*/true); + Builder.CreateBr(Header); + IndVarPHI->addIncoming(Next, Latch); + + Builder.SetInsertPoint(Exit); + Builder.CreateBr(After); + + // After all control flow has been created, insert the body user code. + BodyGenCB(InsertPointTy(Body, Body->begin()), IndVarPHI); + + // Remember and return the canonical control flow. + LoopInfos.emplace_front(); + CanonicalLoopInfo *CL = &LoopInfos.front(); + + CL->Preheader = Preheader; + CL->Header = Header; + CL->Cond = Cond; + CL->Body = Body; + CL->Latch = Latch; + CL->Exit = Exit; + CL->After = After; + + CL->IsValid = true; + +#ifndef NDEBUG + CL->assertOK(); +#endif + return CL; +} + +CanonicalLoopInfo *OpenMPIRBuilder::CreateCanonicalLoop( + const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop) { + // Consider the following difficulties (assuming 8-bit signed integers): + // * Adding \p Step to the loop counter which passes \p Stop may overflow: + // DO I = 1, 100, 50 + /// * A \p Step of INT_MIN cannot not be normalized to a positive direction: + // DO I = 100, 0, -128 + + // Start, Stop and Step must be of the same integer type. + auto *IndVarTy = cast(Start->getType()); + assert(IndVarTy == Stop->getType()); + assert(IndVarTy == Step->getType()); + + updateToLocation(Loc); + + ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); + ConstantInt *One = ConstantInt::get(IndVarTy, 1); + + // Like Step, but always positive. + Value *Incr = Step; + + // Distance between Start and Stop; always positive. + Value *Span; + + // Condition whether there are no iterations are executed at all, e.g. because + // UB < LB. + Value *ZeroCmp; + + if (IsSigned) { + // Ensure that increment is positive. If not, negate and invert LB and UB. + Value *IsNeg = Builder.CreateICmpSLT(Step, Zero); + Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step); + Value *LB = Builder.CreateSelect(IsNeg, Stop, Start); + Value *UB = Builder.CreateSelect(IsNeg, Start, Stop); + Span = Builder.CreateSub(UB, LB, "", false, true); + ZeroCmp = Builder.CreateICmp( + InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB); + } else { + Span = Builder.CreateSub(Stop, Start, "", true); + ZeroCmp = Builder.CreateICmp( + InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start); + } + + Value *CountIfLooping; + if (InclusiveStop) { + CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One); + } else { + // Avoid incrementing past stop since it could overflow. + Value *CountIfTwo = Builder.CreateAdd( + Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One); + Value *OneCmp = Builder.CreateICmp( + InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr); + CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); + } + Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping); + + auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { + Builder.restoreIP(CodeGenIP); + Value *Span = Builder.CreateMul(IV, Step); + Value *IndVar = Builder.CreateAdd(Span, Start); + BodyGenCB(Builder.saveIP(), IndVar); + }; + return CreateCanonicalLoop(Builder.saveIP(), BodyGen, TripCount); +} + +void CanonicalLoopInfo::eraseFromParent() { + assert(IsValid); + IsValid = false; + + SmallVector BBsToRemove{Header, Cond, Latch, Exit}; + SmallVector InstsToRemove; + + // Only remove preheader if not re-purposed somewhere else. + if (Preheader->getNumUses() == 0) + BBsToRemove.push_back(Preheader); + + for (BasicBlock *BB : BBsToRemove) + for (auto &I : *BB) + InstsToRemove.push_back(&I); + + for (Instruction *I : InstsToRemove) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + + for (auto BB : BBsToRemove) + BB->eraseFromParent(); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, @@ -1218,3 +1383,49 @@ Worklist.push_back(SuccBB); } } + +void CanonicalLoopInfo::assertOK() const { +#ifndef NDEBUG + if (!IsValid) + return; + + // Verify standard control-flow we use for OpenMP loops. + assert(Preheader); + assert(isa(Preheader->getTerminator())); + assert(Preheader->getSingleSuccessor() == Header); + + assert(Header); + assert(isa(Header->getTerminator())); + assert(Header->getSingleSuccessor() == Cond); + + assert(Cond); + assert(Cond->getSinglePredecessor() == Header); + assert(isa(Header->getTerminator())); + assert(size(successors(Cond)) == 2); + assert(cast(Cond->getTerminator())->getSuccessor(0) == Body); + assert(cast(Cond->getTerminator())->getSuccessor(1) == Exit); + + assert(Body); + assert(Body->getSinglePredecessor() == Cond); + + assert(Latch); + assert(isa(Latch->getTerminator())); + assert(Latch->getSingleSuccessor() == Header); + + assert(Exit); + assert(isa(Exit->getTerminator())); + assert(Exit->getSingleSuccessor() == After); + + assert(After); + assert(After->getSinglePredecessor() == Exit); + + Instruction *IndVar = getIndVar(); + assert(IndVar); + assert(isa(IndVar->getType())); + assert(cast(IndVar)->getParent() == Header); + + Value *TripCount = getTripCount(); + assert(TripCount); + assert(IndVar->getType() == TripCount->getType()); +#endif +} diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -829,6 +829,136 @@ } } +TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + Value *TripCount = F->getArg(0); + + unsigned NumBodiesGenerated = 0; + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { + NumBodiesGenerated += 1; + + Builder.restoreIP(CodeGenIP); + + Value *Cmp = Builder.CreateICmpEQ(LC, TripCount); + Instruction *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), + &ThenTerm, &ElseTerm); + }; + + CanonicalLoopInfo *Loop = + OMPBuilder.CreateCanonicalLoop(Loc, LoopBodyGenCB, TripCount); + + Builder.restoreIP(Loop->getAfterIP()); + ReturnInst *RetInst = Builder.CreateRetVoid(); + OMPBuilder.finalize(); + + Loop->assertOK(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + EXPECT_EQ(NumBodiesGenerated, 1U); + + // Verify control flow structure (in addition to Loop->assertOK()). + EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock()); + EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock()); + + Instruction *IndVar = Loop->getIndVar(); + EXPECT_TRUE(isa(IndVar)); + EXPECT_EQ(IndVar->getType(), TripCount->getType()); + EXPECT_EQ(IndVar->getParent(), Loop->getHeader()); + + EXPECT_EQ(Loop->getTripCount(), TripCount); + + BasicBlock *Body = Loop->getBody(); + Instruction *CmpInst = &Body->getInstList().front(); + EXPECT_TRUE(isa(CmpInst)); + EXPECT_EQ(CmpInst->getOperand(0), IndVar); + + BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor(); + EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) { + return SuccBB->getSingleSuccessor() == LatchPred; + })); + + EXPECT_EQ(&Loop->getAfter()->front(), RetInst); +} + +TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB); + + // Check the trip count is computed correctly. We generate the canonical loop + // but rely on the IRBuilder's constant folder to compute the final result + // since all inputs are constant. To verify overflow situations, limit the + // trip count / loop counter widths to 16 bits. + auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step, + bool IsSigned, bool InclusiveStop) -> int64_t { + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + Type *LCTy = Type::getInt16Ty(Ctx); + Value *StartVal = ConstantInt::get(LCTy, Start); + Value *StopVal = ConstantInt::get(LCTy, Stop); + Value *StepVal = ConstantInt::get(LCTy, Step); + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {}; + CanonicalLoopInfo *Loop = + OMPBuilder.CreateCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, + StepVal, IsSigned, InclusiveStop); + Loop->assertOK(); + Builder.restoreIP(Loop->getAfterIP()); + Value *TripCount = Loop->getTripCount(); + return cast(TripCount)->getValue().getZExtValue(); + }; + + ASSERT_EQ(EvalTripCount(0, 0, 1, false, false), 0); + ASSERT_EQ(EvalTripCount(0, 1, 2, false, false), 1); + ASSERT_EQ(EvalTripCount(0, 42, 1, false, false), 42); + ASSERT_EQ(EvalTripCount(0, 42, 2, false, false), 21); + ASSERT_EQ(EvalTripCount(21, 42, 1, false, false), 21); + ASSERT_EQ(EvalTripCount(0, 5, 5, false, false), 1); + ASSERT_EQ(EvalTripCount(0, 9, 5, false, false), 2); + ASSERT_EQ(EvalTripCount(0, 11, 5, false, false), 3); + ASSERT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF); + ASSERT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0); + ASSERT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1); + ASSERT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100); + ASSERT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1); + + ASSERT_EQ(EvalTripCount(0, 6, 5, false, false), 2); + ASSERT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2); + ASSERT_EQ(EvalTripCount(0, 0, 1, false, true), 1); + ASSERT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1); + ASSERT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF); + ASSERT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000); + + ASSERT_EQ(EvalTripCount(0, 0, -1, true, false), 0); + ASSERT_EQ(EvalTripCount(0, 1, -1, true, true), 0); + ASSERT_EQ(EvalTripCount(20, 5, -5, true, false), 3); + ASSERT_EQ(EvalTripCount(20, 5, -5, true, true), 4); + ASSERT_EQ(EvalTripCount(-4, -2, 2, true, false), 1); + ASSERT_EQ(EvalTripCount(-4, -3, 2, true, false), 1); + ASSERT_EQ(EvalTripCount(-4, -2, 2, true, true), 2); + + ASSERT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000); + ASSERT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001); + ASSERT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF); + ASSERT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF); + ASSERT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2); + ASSERT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF); + ASSERT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000); + ASSERT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800); + ASSERT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF); + ASSERT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1); + ASSERT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2); + + // Finalize the function and verify it. + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); +} + TEST_F(OpenMPIRBuilderTest, MasterDirective) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M);