Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -603,6 +603,21 @@ /// \param Simdlen The Simdlen length to apply to the simd loop. void applySimd(CanonicalLoopInfo *Loop, ConstantInt *Simdlen); + /// Create simd-ize loop. + /// + /// This function generates LLVM IR code which corresponds to SIMD construct. + /// It collapses set of input loops into one loop and it attaches SIMD + /// specific metadata to the collapsed loop. If IfCond is given, then + /// the collapsed loop is cloned without SIMD specific metadata. Cloned + /// loop is executed only if ifCond is evaluated to false. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loops The set of loops annotated with pragma simd directive + /// \param IfCond The optional if clause + /// \param Simdlen The optional simdlen clause + void createSimdLoop(DebugLoc DL, ArrayRef Loops, + Value *IfCond = nullptr, ConstantInt *Simdlen = nullptr); + /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -34,6 +34,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/UnrollLoop.h" @@ -2839,32 +2840,40 @@ return Result; } -/// Attach loop metadata \p Properties to the loop described by \p Loop. If the -/// loop already has metadata, the loop properties are appended. -static void addLoopMetadata(CanonicalLoopInfo *Loop, - ArrayRef Properties) { - assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo"); - +/// Attach metadata \p Properties to the basic block described by \p BB. If the +/// basic block already has metadata, the basic block properties are appended. +static void addBasicBlockMetadata(BasicBlock *BB, + ArrayRef Properties) { // Nothing to do if no property to attach. if (Properties.empty()) return; - LLVMContext &Ctx = Loop->getFunction()->getContext(); - SmallVector NewLoopProperties; - NewLoopProperties.push_back(nullptr); + LLVMContext &Ctx = BB->getContext(); + SmallVector NewProperties; + NewProperties.push_back(nullptr); - // If the loop already has metadata, prepend it to the new metadata. - BasicBlock *Latch = Loop->getLatch(); - assert(Latch && "A valid CanonicalLoopInfo must have a unique latch"); - MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop); + // If the basic block already has metadata, prepend it to the new metadata. + MDNode *Existing = BB->getTerminator()->getMetadata(LLVMContext::MD_loop); if (Existing) - append_range(NewLoopProperties, drop_begin(Existing->operands(), 1)); + append_range(NewProperties, drop_begin(Existing->operands(), 1)); - append_range(NewLoopProperties, Properties); - MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); - LoopID->replaceOperandWith(0, LoopID); + append_range(NewProperties, Properties); + MDNode *BasicBlockID = MDNode::getDistinct(Ctx, NewProperties); + BasicBlockID->replaceOperandWith(0, BasicBlockID); - Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); + BB->getTerminator()->setMetadata(LLVMContext::MD_loop, BasicBlockID); +} + +/// Attach loop metadata \p Properties to the loop described by \p Loop. If the +/// loop already has metadata, the loop properties are appended. +static void addLoopMetadata(CanonicalLoopInfo *Loop, + ArrayRef Properties) { + assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo"); + + // Attach metadata to the loop's latch + BasicBlock *Latch = Loop->getLatch(); + assert(Latch && "A valid CanonicalLoopInfo must have a unique latch"); + addBasicBlockMetadata(Latch, Properties); } /// Attach llvm.access.group metadata to the memref instructions of \p Block @@ -2895,6 +2904,81 @@ }); } +void OpenMPIRBuilder::createSimdLoop(DebugLoc DL, + ArrayRef Loops, + Value *IfCond, ConstantInt *Simdlen) { + LLVMContext &Ctx = Builder.getContext(); + CanonicalLoopInfo *CollapsedLoop = collapseLoops(DL, Loops, {}); + if (IfCond) { + Function *F = CollapsedLoop->getFunction(); + + // Define where if branch should be inserted + Instruction *SplitBefore; + if (Instruction::classof(IfCond)) { + SplitBefore = dyn_cast(IfCond); + } else { + SplitBefore = CollapsedLoop->getPreheader()->getTerminator(); + } + FunctionAnalysisManager FAM; + FAM.registerPass([]() { return DominatorTreeAnalysis(); }); + FAM.registerPass([]() { return LoopAnalysis(); }); + FAM.registerPass([]() { return PassInstrumentationAnalysis(); }); + + // Get the loop which needs to be cloned + LoopAnalysis LIA; + LoopInfo &&LI = LIA.run(*F, FAM); + Loop *L = LI.getLoopFor(CollapsedLoop->getHeader()); + + // Create additional blocks for the if statement + BasicBlock *Head = SplitBefore->getParent(); + Instruction *HeadOldTerm = Head->getTerminator(); + llvm::LLVMContext &C = Head->getContext(); + llvm::BasicBlock *ThenBlock = llvm::BasicBlock::Create( + C, "simd.if.then", Head->getParent(), Head->getNextNode()); + llvm::BasicBlock *ElseBlock = llvm::BasicBlock::Create( + C, "simd.if.else", Head->getParent(), Loops.front()->getExit()); + ValueToValueMapTy VMap; + + // Create if condition branch. + Builder.SetInsertPoint(HeadOldTerm); + Instruction *BrInstr = + Builder.CreateCondBr(IfCond, ThenBlock, /*ifFalse*/ ElseBlock); + InsertPointTy IP{BrInstr->getParent(), ++BrInstr->getIterator()}; + // Then block contains branch to omp loop which needs to be vectorized + spliceBB(IP, ThenBlock, false); + ThenBlock->replaceSuccessorsPhiUsesWith(Head, ThenBlock); + + Builder.SetInsertPoint(ElseBlock); + + // Clone loop for the else branch + SmallVector NewBlocks; + BasicBlock *NewLatchBlock = nullptr; + + VMap[CollapsedLoop->getPreheader()] = ElseBlock; + for (BasicBlock *Block : L->getBlocks()) { + BasicBlock *NewBB = CloneBasicBlock(Block, VMap, "", F); + NewBB->moveBefore(CollapsedLoop->getExit()); + VMap[Block] = NewBB; + NewBlocks.push_back(NewBB); + if (Block == CollapsedLoop->getLatch()) + NewLatchBlock = NewBB; + } + remapInstructionsInBlocks(NewBlocks, VMap); + Builder.CreateBr(NewBlocks.front()); + + // Add metadata to the cloned loop which disables vectorization + ConstantAsMetadata *BoolConst = + ConstantAsMetadata::get(ConstantInt::getFalse(Type::getInt1Ty(Ctx))); + addBasicBlockMetadata( + NewLatchBlock, + {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + BoolConst})}); + } + Builder.SetInsertPoint(CollapsedLoop->getAfter()); + // Add simd metadata to the loop + applySimd(CollapsedLoop, Simdlen); +} + void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, ConstantInt *Simdlen) { LLVMContext &Ctx = Builder.getContext(); Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1828,6 +1828,91 @@ })); } +TEST_F(OpenMPIRBuilderTest, CreateSimdLoop) { + OpenMPIRBuilder OMPBuilder(*M); + SmallVector LoopInfos; + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); + LoopInfos.push_back(CLI); + + // Simd-ize the loop without if condition + OMPBuilder.createSimdLoop(DL, LoopInfos, nullptr, + ConstantInt::get(Type::getInt32Ty(Ctx), 3)); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + LoopInfo &LI = FAM.getResult(*F); + + const std::vector &TopLvl = LI.getTopLevelLoops(); + EXPECT_EQ(TopLvl.size(), 1u); + + Loop *L = TopLvl.front(); + EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); + + // Check for llvm.access.group metadata attached to the printf + // function in the loop body. + BasicBlock *LoopBody = CLI->getBody(); + EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { + return I.getMetadata("llvm.access.group") != nullptr; + })); +} + +TEST_F(OpenMPIRBuilderTest, CreateSimdLoopIf) { + OpenMPIRBuilder OMPBuilder(*M); + IRBuilder<> Builder(BB); + AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); + AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); + + // Generation of if condition + Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1); + Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2); + LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); + LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2); + + Value *IfCmp = Builder.CreateICmpNE(Load1, Load2); + + SmallVector LoopInfos; + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); + LoopInfos.push_back(CLI); + + // Simd-ize the loop with if condition + OMPBuilder.createSimdLoop(DL, LoopInfos, IfCmp, + ConstantInt::get(Type::getInt32Ty(Ctx), 3)); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + LoopInfo &LI = FAM.getResult(*F); + + // Check if there are two loops (one with enabled vectorization) + const std::vector &TopLvl = LI.getTopLevelLoops(); + EXPECT_EQ(TopLvl.size(), 2u); + + Loop *L = TopLvl[0]; + EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); + + // The second loop should have disabled vectorization + L = TopLvl[1]; + EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + // Check for llvm.access.group metadata attached to the printf + // function in the loop body. + BasicBlock *LoopBody = CLI->getBody(); + EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { + return I.getMetadata("llvm.access.group") != nullptr; + })); +} + TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { OpenMPIRBuilder OMPBuilder(*M); Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -912,11 +912,6 @@ SmallVector loopInfos; SmallVector bodyInsertPoints; LogicalResult bodyGenStatus = success(); - - // TODO: The code generation for if clause is not supported yet. - if (loop.if_expr()) - return failure(); - auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { // Make sure further conversions know about the induction variable. moduleTranslation.mapValue( @@ -966,14 +961,15 @@ return failure(); } - // Collapse loops. - llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); - llvm::CanonicalLoopInfo *loopInfo = - ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); - - ompBuilder->applySimd(loopInfo, nullptr); - - builder.restoreIP(afterIP); + llvm::IRBuilderBase::InsertPoint AfterIP = loopInfos.front()->getAfterIP(); + if (loop.if_expr()) { + ompBuilder->createSimdLoop(ompLoc.DL, loopInfos, + moduleTranslation.lookupValue(loop.if_expr()), + nullptr); + } else { + ompBuilder->createSimdLoop(ompLoc.DL, loopInfos); + } + builder.restoreIP(AfterIP); return success(); } Index: mlir/test/Target/LLVMIR/openmp-llvm.mlir =================================================================== --- mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -728,6 +728,34 @@ // ----- +// CHECK-LABEL: @simdloop_if +llvm.func @simdloop_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr {fir.bindc_name = "threshold"}) { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {adapt.valuebyref, in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFtest_simdEi"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(0 : i32) : i32 + %5 = llvm.load %arg0 : !llvm.ptr + %6 = llvm.mlir.constant(1 : i32) : i32 + %7 = llvm.load %arg0 : !llvm.ptr + %8 = llvm.load %arg1 : !llvm.ptr + %9 = llvm.icmp "sge" %7, %8 : i32 + omp.simdloop if(%9) for (%arg2) : i32 = (%4) to (%5) inclusive step (%6) { + // The form of the emitted IR is controlled by OpenMPIRBuilder and + // tested there. Just check that the right metadata is added. + // CHECK: llvm.access.group + llvm.store %arg2, %1 : !llvm.ptr + omp.yield + } + llvm.return +} +// Be sure that llvm.loop.vectorize.enable metadata appears twice +// CHECK: llvm.loop.parallel_accesses +// CHECK-NEXT: llvm.loop.vectorize.enable +// CHECK: llvm.loop.vectorize.enable + +// ----- + llvm.func @body(i64) llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {