Index: flang/test/Lower/OpenMP/simd.f90 =================================================================== --- flang/test/Lower/OpenMP/simd.f90 +++ flang/test/Lower/OpenMP/simd.f90 @@ -1,6 +1,8 @@ ! Tests for 2.9.3.1 Simd ! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s +! RUN: bbc -fopenmp -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefixes="LLVMIRDialect" +! RUN: bbc -fopenmp -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | tco | FileCheck %s --check-prefix=LLVMIR !CHECK-LABEL: func @_QPsimdloop() subroutine simdloop @@ -27,7 +29,15 @@ ! CHECK: %[[UB:.*]] = fir.load %arg0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: %[[COND:.*]] = arith.cmpi sge + ! LLVMIRDialect: %[[LB_LLVMIR1:.*]] = llvm.mlir.constant(1 : i32) : i32 + ! LLVMIRDialect: %[[UB_LLVMIR1:.*]] = llvm.load %arg0 : !llvm.ptr + ! LLVMIRDialect: %[[STEP_LLVMIR1:.*]] = llvm.mlir.constant(1 : i32) : i32 + ! LLVMIRDialect: %[[COND_LLVMIR1:.*]] = llvm.icmp "sge" ! CHECK: omp.simdloop if(%[[COND:.*]]) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! LLVMIRDialect: omp.simdloop if(%[[COND_LLVMIR1:.*]]) for (%[[I:.*]]) : i32 = (%[[LB_LLVMIR1]]) to (%[[UB_LLVMIR1]]) inclusive step (%[[STEP_LLVMIR1]]) { + ! LLVMIR: "llvm.loop.parallel_accesses" + ! LLVMIR: !{!"llvm.loop.vectorize.enable", i1 true} + ! LLVMIR: !{!"llvm.loop.vectorize.enable", i1 false} do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -272,13 +272,15 @@ /// \param BodyGenCB Callback that will generate the loop body code. /// \param TripCount Number of iterations the loop body is executed. /// \param Name Base name used to derive BB and instruction names. + /// \param Preheader If not null, place trip count values in that block. /// /// \returns An object representing the created control flow structure which /// can be used for loop-associated directives. CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, - const Twine &Name = "loop"); + const Twine &Name = "loop", + BasicBlock *Preheader = nullptr); /// Generator for the control flow structure of an OpenMP canonical loop. /// @@ -325,6 +327,8 @@ /// at the outermost loop of a loop nest. If not set, /// defaults to the preheader of the generated loop. /// \param Name Base name used to derive BB and instruction names. + /// \param CreatePreheader If true, this function creates preheader block. + /// Trip count values should be placed in that block. /// /// \returns An object representing the created control flow structure which /// can be used for loop-associated directives. @@ -333,7 +337,8 @@ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP = {}, - const Twine &Name = "loop"); + const Twine &Name = "loop", + bool CreatePreheader = false); /// Collapse a loop nest into a single loop. /// @@ -603,6 +608,14 @@ /// \param Simdlen The Simdlen length to apply to the simd loop. void applySimd(CanonicalLoopInfo *Loop, ConstantInt *Simdlen); + /// Add metadata to simd-ize a loops. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loops The set of loops annotated with pragma simd directive + /// \param IfCond The optional if clause + void applySimd(DebugLoc DL, ArrayRef Loops, + Value *IfCond = nullptr, ConstantInt *Simdlen = nullptr); + /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered @@ -1554,13 +1567,14 @@ /// \param PostInsertBefore Where to insert BBs that execute after the body. /// \param Name Base name used to derive BB /// and instruction names. + /// \param Preheader Preheader block. If it is not null, no new preheader + /// is created. /// /// \returns The CanonicalLoopInfo that represents the emitted loop. - CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, - Function *F, - BasicBlock *PreInsertBefore, - BasicBlock *PostInsertBefore, - const Twine &Name = {}); + CanonicalLoopInfo * + createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, + BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, + const Twine &Name = {}, BasicBlock *Preheader = nullptr); }; /// Class to represented the control flow structure of an OpenMP canonical loop. Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -34,6 +34,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/UnrollLoop.h" @@ -1843,14 +1844,18 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, - BasicBlock *PostInsertBefore, const Twine &Name) { + BasicBlock *PostInsertBefore, const Twine &Name, BasicBlock *Preheader) { Module *M = F->getParent(); LLVMContext &Ctx = M->getContext(); Type *IndVarTy = TripCount->getType(); + bool PreheaderCreated = false; // Create the basic block structure. - BasicBlock *Preheader = - BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore); + if (!Preheader) { + PreheaderCreated = true; + Preheader = BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, + PreInsertBefore); + } BasicBlock *Header = BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore); BasicBlock *Cond = @@ -1866,8 +1871,8 @@ // Use specified DebugLoc for new instructions. Builder.SetCurrentDebugLocation(DL); - - Builder.SetInsertPoint(Preheader); + if (PreheaderCreated) + Builder.SetInsertPoint(Preheader); Builder.CreateBr(Header); Builder.SetInsertPoint(Header); @@ -1907,15 +1912,20 @@ return CL; } -CanonicalLoopInfo * -OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, - LoopBodyGenCallbackTy BodyGenCB, - Value *TripCount, const Twine &Name) { +CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( + const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, + Value *TripCount, const Twine &Name, BasicBlock *Preheader) { BasicBlock *BB = Loc.IP.getBlock(); BasicBlock *NextBB = BB->getNextNode(); + CanonicalLoopInfo *CL; + if (Preheader) + CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), + Preheader->getNextNode(), Preheader->getNextNode(), + Name, Preheader); + else + CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), NextBB, NextBB, + Name); - CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), - NextBB, NextBB, Name); BasicBlock *After = CL->getAfter(); // If location is not set, don't connect the loop. @@ -1940,7 +1950,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, - InsertPointTy ComputeIP, const Twine &Name) { + InsertPointTy ComputeIP, const Twine &Name, bool CreatePreheader) { // Consider the following difficulties (assuming 8-bit signed integers): // * Adding \p Step to the loop counter which passes \p Stop may overflow: @@ -1952,13 +1962,22 @@ auto *IndVarTy = cast(Start->getType()); assert(IndVarTy == Stop->getType() && "Stop type mismatch"); assert(IndVarTy == Step->getType() && "Step type mismatch"); - + BasicBlock *Preheader = nullptr; + if (CreatePreheader) { + Module *M = Loc.IP.getBlock()->getParent()->getParent(); + Function *F = Loc.IP.getBlock()->getParent(); + LLVMContext &Ctx = M->getContext(); + Preheader = BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, + Loc.IP.getBlock()->getNextNode()); + } LocationDescription ComputeLoc = ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; updateToLocation(ComputeLoc); ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); ConstantInt *One = ConstantInt::get(IndVarTy, 1); + if (CreatePreheader) + Builder.SetInsertPoint(Preheader); // Like Step, but always positive. Value *Incr = Step; @@ -2006,7 +2025,7 @@ BodyGenCB(Builder.saveIP(), IndVar); }; LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); - return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); + return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name, Preheader); } // Returns an LLVM function to call for initializing loop bounds using OpenMP @@ -2544,10 +2563,11 @@ // Setup the IRBuilder for inserting the trip count computation. Builder.SetCurrentDebugLocation(DL); - if (ComputeIP.isSet()) + if (ComputeIP.isSet()) { Builder.restoreIP(ComputeIP); - else + } else { Builder.restoreIP(Outermost->getPreheaderIP()); + } // Derive the collapsed' loop trip count. // TODO: Find common/largest indvar type. @@ -2839,6 +2859,33 @@ return Result; } +/// Attach loop metadata \p Properties to the loop described by \p Loop. If the +/// loop already has metadata, the loop properties are appended. +static void addLoopMetadata(Loop *Loop, ArrayRef Properties) { + + // Nothing to do if no property to attach. + if (Properties.empty()) + return; + + // If the loop already has metadata, prepend it to the new metadata. + BasicBlock *Latch = Loop->getLoopLatch(); + + LLVMContext &Ctx = Latch->getParent()->getContext(); + SmallVector NewLoopProperties; + NewLoopProperties.push_back(nullptr); + + assert(Latch && "A valid CanonicalLoopInfo must have a unique latch"); + MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop); + if (Existing) + append_range(NewLoopProperties, drop_begin(Existing->operands(), 1)); + + append_range(NewLoopProperties, Properties); + MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); + LoopID->replaceOperandWith(0, LoopID); + + Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); +} + /// Attach loop metadata \p Properties to the loop described by \p Loop. If the /// loop already has metadata, the loop properties are appended. static void addLoopMetadata(CanonicalLoopInfo *Loop, @@ -2895,6 +2942,71 @@ }); } +void OpenMPIRBuilder::applySimd(DebugLoc DL, + ArrayRef Loops, + Value *IfCond, ConstantInt *Simdlen) { + LLVMContext &Ctx = Builder.getContext(); + FunctionAnalysisManager FAM; + Function *F = Loops.front()->getFunction(); + FAM.registerPass([]() { return DominatorTreeAnalysis(); }); + FAM.registerPass([]() { return LoopAnalysis(); }); + FAM.registerPass([]() { return PassInstrumentationAnalysis(); }); + + LoopAnalysis LIA; + LoopInfo &&LI = LIA.run(*F, FAM); + DominatorTreeAnalysis DTA; + DominatorTree &&DT = DTA.run(*F, FAM); + + CanonicalLoopInfo *CollapsedLoop = collapseLoops(DL, Loops, {}); + Loop *L = LI.getLoopFor(CollapsedLoop->getHeader()); + if (IfCond) { + Instruction *SplitBefore; + if (Instruction::classof(IfCond)) { + SplitBefore = dyn_cast(IfCond); + } else { + CanonicalLoopInfo *FrontLoop = Loops.front(); + SplitBefore = FrontLoop->getPreheader()->getTerminator(); + } + BasicBlock *Head = SplitBefore->getParent(); + + Instruction *HeadOldTerm = Head->getTerminator(); + llvm::LLVMContext &C = Head->getContext(); + llvm::BasicBlock *ThenBlock = llvm::BasicBlock::Create( + C, "simd.if.then", Head->getParent(), Head->getNextNode()); + llvm::BasicBlock *ElseBlock = llvm::BasicBlock::Create( + C, "simd.if.else", Head->getParent(), Loops.front()->getExit()); + SmallVector PostLoopBlocks; + Loop *PostLoop; + ValueToValueMapTy VMap; + + // Create if condition branch. + Builder.SetInsertPoint(HeadOldTerm); + Instruction *BrInstr = + Builder.CreateCondBr(IfCond, ThenBlock, /*ifFalse*/ ElseBlock); + InsertPointTy IP{BrInstr->getParent(), ++BrInstr->getIterator()}; + // Then block contains branch to omp loop which needs to be vectorized + spliceBB(IP, ThenBlock, false); + ThenBlock->replaceSuccessorsPhiUsesWith(Head, ThenBlock); + + // Else block contains branch to cloned omp loop which is not vectorized + PostLoop = cloneLoopWithPreheader(CollapsedLoop->getExit(), + CollapsedLoop->getPreheader(), L, VMap, + ".else", &LI, &DT, PostLoopBlocks); + remapInstructionsInBlocks(PostLoopBlocks, VMap); + Builder.SetInsertPoint(ElseBlock); + Builder.CreateBr(PostLoop->getLoopPreheader()); + ConstantAsMetadata *BoolConst = + ConstantAsMetadata::get(ConstantInt::getFalse(Type::getInt1Ty(Ctx))); + // Disable vectorization if the simd condition is not met + addLoopMetadata( + PostLoop, + {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + BoolConst})}); + } + // Add simd metadata to the loop + applySimd(CollapsedLoop, Simdlen); +} + void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, ConstantInt *Simdlen) { LLVMContext &Ctx = Builder.getContext(); Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -22,6 +22,7 @@ #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace mlir; @@ -912,11 +913,6 @@ SmallVector loopInfos; SmallVector bodyInsertPoints; LogicalResult bodyGenStatus = success(); - - // TODO: The code generation for if clause is not supported yet. - if (loop.if_expr()) - return failure(); - auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { // Make sure further conversions know about the induction variable. moduleTranslation.mapValue( @@ -935,7 +931,6 @@ convertOmpOpRegions(loop.region(), "omp.simdloop.region", builder, moduleTranslation, bodyGenStatus); }; - // Delegate actual loop construction to the OpenMP IRBuilder. // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, // i.e. it has a positive step, uses signed integer semantics. Reconsider @@ -953,27 +948,37 @@ // loop will be created below. llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; - if (i != 0) { + if (i == 0) { + // OMPBuilder should create loop preheader for the most outermost loop + // All trip count values will be placed in that block + loopInfos.push_back(ompBuilder->createCanonicalLoop( + loc, bodyGen, lowerBound, upperBound, step, + /*IsSigned=*/true, /*Inclusive=*/true, computeIP, "loop", + /*CreatePreheader=*/true)); + } else { + // Preheader block for the most outermost loop has been created. loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), ompLoc.DL); computeIP = loopInfos.front()->getPreheaderIP(); + loopInfos.push_back(ompBuilder->createCanonicalLoop( + loc, bodyGen, lowerBound, upperBound, step, + /*IsSigned=*/true, /*Inclusive=*/true, computeIP, "loop", + /*CreatePreheader=*/false)); } - loopInfos.push_back(ompBuilder->createCanonicalLoop( - loc, bodyGen, lowerBound, upperBound, step, - /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); - if (failed(bodyGenStatus)) return failure(); } // Collapse loops. - llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); - llvm::CanonicalLoopInfo *loopInfo = - ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); - - ompBuilder->applySimd(loopInfo, nullptr); - - builder.restoreIP(afterIP); + llvm::IRBuilderBase::InsertPoint AfterIP = loopInfos.front()->getAfterIP(); + if (loop.if_expr()) { + ompBuilder->applySimd(ompLoc.DL, loopInfos, + moduleTranslation.lookupValue(loop.if_expr()), + nullptr); + } else { + ompBuilder->applySimd(ompLoc.DL, loopInfos); + } + builder.restoreIP(AfterIP); return success(); }