Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2582,7 +2582,70 @@ } } +static bool isSupportedByOpenMPIRBuilder(const OMPExecutableDirective &S) { + // Check for unsupported clauses + for (OMPClause *C : S.clauses()) + if (isa(C) || isa(C) || + isa(C) || isa(C) || + isa(C) || isa(C) || + isa(C) || isa(C) || + isa(C) || isa(C) || + isa(C)) + return false; + // Check if we have a statement with the ordered directive. + // Visit the statement hierarchy to find a compound statement + // with a ordered directive in it. + if (const auto *CanonLoop = dyn_cast(S.getRawStmt())) { + if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { + for (const Stmt *SubStmt : SyntacticalLoop->children()) { + if (!SubStmt) + continue; + if (const CompoundStmt *CS = dyn_cast(SubStmt)) { + for (const Stmt *CSSubStmt : CS->children()) { + if (!CSSubStmt) + continue; + if (isa(CSSubStmt)) { + return false; + } + } + } + } + } + } + return true; +} + void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + bool UseOMPIRBuilder = + CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); + if (UseOMPIRBuilder) { + auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, + PrePostActionTy &) { + // Use the OpenMPIRBuilder if enabled. + if (UseOMPIRBuilder) { + // Emit the associated statement and get its loop representation. + llvm::DebugLoc DL = SourceLocToDebugLoc(S.getBeginLoc()); + const Stmt *Inner = S.getRawStmt(); + llvm::CanonicalLoopInfo *CLI = + EmitOMPCollapsedCanonicalLoopNest(Inner, 1); + + llvm::OpenMPIRBuilder &OMPBuilder = + CGM.getOpenMPRuntime().getOMPBuilder(); + // Add SIMD specific metadata + OMPBuilder.applySimd(DL, CLI); + return; + } + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, + CodeGenIRBuilder); + } + return; + } + ParentLoopDirectiveForScanRegion ScanRegion(*this, S); OMPFirstScanLoop = true; auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Index: clang/test/OpenMP/irbuilder_simd.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/irbuilder_simd.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECKTWOLOOPS +// expected-no-diagnostics + +struct S { + int a, b; +}; + +struct P { + int a, b; +}; + +void simple(float *a, float *b, int *c) { + S s, *p; + P pp; +#pragma omp simd + for (int i = 3; i < 32; i += 5) { + // llvm.access.group test + // CHECK: %[[A_ADDR:.+]] = alloca float*, align 8 + // CHECK: %[[B_ADDR:.+]] = alloca float*, align 8 + // CHECK: %[[S:.+]] = alloca %struct.S, align 4 + // CHECK: %[[P:.+]] = alloca %struct.S*, align 8 + // CHECK: %[[I:.+]] = alloca i32, align 4 + // CHECK: %[[TMP3:.+]] = load float*, float** %[[B_ADDR:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4:.+]] to i64 + // CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3:.+]], i64 %[[IDXPROM:.+]] + // CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[A2:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[S:.+]], i32 0, i32 0 + // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[A2:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[CONV:.+]] = sitofp i32 %[[TMP6:.+]] to float + // CHECK-NEXT: %[[ADD:.+]] = fadd float %[[TMP5:.+]], %[[CONV:.+]] + // CHECK-NEXT: %[[TMP7:.+]] = load %struct.S*, %struct.S** %[[P:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[A3:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[TMP7:.+]], i32 0, i32 0 + // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[A3:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[CONV4:.+]] = sitofp i32 %[[TMP8:.+]] to float + // CHECK-NEXT: %[[ADD5:.+]] = fadd float %[[ADD:.+]], %[[CONV4:.+]] + // CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[A_ADDR:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[IDXPROM6:.+]] = sext i32 %[[TMP10:.+]] to i64 + // CHECK-NEXT: %[[ARRAYIDX7:.+]] = getelementptr inbounds float, float* %[[TMP9:.+]], i64 %[[IDXPROM6:.+]] + // CHECK-NEXT: store float %[[ADD5:.+]], float* %[[ARRAYIDX7:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // llvm.loop test + // CHECK: %[[OMP_LOOPDOTNEXT:.+]] = add nuw i32 %[[OMP_LOOPDOTIV:.+]], 1 + // CHECK-NEXT: br label %omp_loop.header, !llvm.loop !5 + a[i] = b[i] + s.a + p->a; + } + +#pragma omp simd + for (int j = 3; j < 32; j += 5) { + // test if unique access groups were used for a second loop + // CHECKTWOLOOPS: %[[A22:.+]] = getelementptr inbounds %struct.P, %struct.P* %[[PP:.+]], i32 0, i32 0 + // CHECKTWOLOOPS-NEXT: %[[TMP14:.+]] = load i32, i32* %[[A22:.+]], align 4, !llvm.access.group ![[META8:[0-9]+]] + // CHECKTWOLOOPS-NEXT: %[[TMP15:.+]] = load i32*, i32** %[[C_ADDR:.+]], align 8, !llvm.access.group ![[META8:[0-9]+]] + // CHECKTWOLOOPS-NEXT: %[[TMP16:.+]] = load i32, i32* %[[J:.+]], align 4, !llvm.access.group ![[META8:[0-9]+]] + // CHECKTWOLOOPS-NEXT: %[[IDXPROM23:.+]] = sext i32 %[[TMP16:.+]] to i64 + // CHECKTWOLOOPS-NEXT: %[[ARRAYIDX24:.+]] = getelementptr inbounds i32, i32* %[[TMP15:.+]], i64 %[[IDXPROM23:.+]] + // CHECKTWOLOOPS-NEXT: store i32 %[[TMP14:.+]], i32* %[[ARRAYIDX24:.+]], align 4, !llvm.access.group ![[META8:[0-9]+]] + // check llvm.loop metadata + // CHECKTWOLOOPS: %[[OMP_LOOPDOTNEXT:.+]] = add nuw i32 %[[OMP_LOOPDOTIV:.+]], 1 + // CHECKTWOLOOPS-NEXT: br label %[[OMP_LLOP_BODY:.*]], !llvm.loop ![[META10:[0-9]+]] + c[j] = pp.a; + } +} + +// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK-NEXT: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} +// CHECK-NEXT: ![[META2:[0-9]+]] = +// CHECK-NEXT: ![[META3:[0-9]+]] = !{![[META4:[0-9]+]]} +// CHECK-NEXT: ![[META4:[0-9]+]] = distinct !{!"llvm.access.group"} +// CHECK-NEXT: ![[META5:[0-9]+]] = distinct !{![[META5:[0-9]+]], ![[META6:[0-9]+]], ![[META7:[0-9]+]]} +// CHECK-NEXT: ![[META6:[0-9]+]] = !{!"llvm.loop.parallel_accesses"} +// CHECK-NEXT: ![[META7:[0-9]+]] = !{!"llvm.loop.vectorize.enable"} +// CHECK-NEXT: ![[META8:[0-9]+]] = !{![[META9:[0-9]+]]} +// CHECK-NEXT: ![[META9:[0-9]+]] = distinct !{!"llvm.access.group"} +// CHECK-NEXT: ![[META10:[0-9]+]] = distinct !{![[META10:[0-9]+]], ![[META6:[0-9]+]], ![[META7:[0-9]+]]} Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -517,6 +517,12 @@ void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI); + /// Add metadata to simd-ize a loop. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loop The loop to simd-ize. + void applySimd(DebugLoc DL, CanonicalLoopInfo *Loop); + /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/AssumptionCache.h" @@ -2116,6 +2117,19 @@ Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); } +/// Attach metadata access.group to the load and store instructions of \p block +static void addSimdMetadata(BasicBlock *Block, + ArrayRef Properties) { + for (auto &I : *Block) { + if (I.mayReadFromMemory() || I.mayWriteToMemory()) { + Instruction *instr = dyn_cast(&I); + LLVMContext &C = instr->getContext(); + MDNode *LoopID = MDNode::get(C, Properties); + instr->setMetadata("llvm.access.group", LoopID); + } + } +} + void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) { LLVMContext &Ctx = Builder.getContext(); addLoopMetadata( @@ -2131,6 +2145,52 @@ }); } +void OpenMPIRBuilder::applySimd(DebugLoc, CanonicalLoopInfo *Loop) { + LLVMContext &Ctx = Builder.getContext(); + addLoopMetadata( + Loop, + {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.parallel_accesses")), + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.vectorize.enable"))}); + + // Find the set of basic blocks reachable from the body unto the + // exit block. May have to enhance this collection for nested loops. + BasicBlock *body = Loop->getBody(); + BasicBlock *exit = Loop->getExit(); + + FunctionAnalysisManager FAM; + FAM.registerPass([]() { return DominatorTreeAnalysis(); }); + DominatorTreeAnalysis DTA; + DominatorTree &&DT = DTA.run(*(Loop->getBody()->getParent()), FAM); + + llvm::SmallSet reachable; + llvm::SmallVector worklist; + + llvm::SmallSet skipBBs; + skipBBs.insert(Loop->getCond()); + skipBBs.insert(Loop->getHeader()); + + worklist.push_back(body); + reachable.insert(body); + while (!worklist.empty()) { + BasicBlock *front = worklist.pop_back_val(); + for (BasicBlock *succ : successors(front)) { + if (reachable.count(succ) == 0) { + /// We need the check here to ensure that we don't run + /// infinitely if the CFG has a loop in it + /// i.e. the BB reaches itself directly or indirectly + worklist.push_back(succ); + if (!DT.dominates(exit, succ) && skipBBs.count(succ) == 0) + reachable.insert(succ); + } + } + } + + MDNode *N = MDNode::getDistinct(Ctx, MDString::get(Ctx, "llvm.access.group")); + for (auto BB : reachable) { + addSimdMetadata(BB, {N}); + } +} + /// Create the TargetMachine object to query the backend for optimization /// preferences. ///