diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2583,6 +2583,36 @@ } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + + bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; + if (UseOMPIRBuilder) { + auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, + PrePostActionTy &) { + // Use the OpenMPIRBuilder if enabled. + if (UseOMPIRBuilder) { + // Emit the associated statement and get its loop representation. + auto DL = SourceLocToDebugLoc(S.getBeginLoc()); + const Stmt *Inner = S.getRawStmt(); + llvm::CanonicalLoopInfo *CLI = + EmitOMPCollapsedCanonicalLoopNest(Inner, 1); + + llvm::OpenMPIRBuilder &OMPBuilder = + CGM.getOpenMPRuntime().getOMPBuilder(); + // Add SIMD specific metadata + OMPBuilder.createSIMDLoop(DL, CLI); + return; + } + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, + CodeGenIRBuilder); + } + return; + } + ParentLoopDirectiveForScanRegion ScanRegion(*this, S); OMPFirstScanLoop = true; auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { diff --git a/clang/test/OpenMP/simd_codegen_irbuilder.cpp b/clang/test/OpenMP/simd_codegen_irbuilder.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/simd_codegen_irbuilder.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int a, b; +}; + +void simple(float *a, float *b) { + S s, *p; + int j = 0; +#pragma omp simd + for (int i = 3; i < 32; i += 5) { + // llvm.access.group test + // CHECK: omp_loop.body: ; preds = %omp_loop.cond + // CHECK-NEXT: call void @__captured_stmt.1(i32* %i, i32 %omp_loop.iv, %struct.anon.0* %agg.captured1) + // CHECK-NEXT: %3 = load float*, float** %b.addr, align 8, !llvm.access.group !3 + // CHECK-NEXT: %4 = load i32, i32* %i, align 4, !llvm.access.group !3 + // CHECK-NEXT: %idxprom = sext i32 %4 to i64 + // CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %3, i64 %idxprom + // CHECK-NEXT: %5 = load float, float* %arrayidx, align 4, !llvm.access.group !3 + // CHECK-NEXT: %a2 = getelementptr inbounds %struct.S, %struct.S* %s, i32 0, i32 0 + // CHECK-NEXT: %6 = load i32, i32* %a2, align 4, !llvm.access.group !3 + // CHECK-NEXT: %conv = sitofp i32 %6 to float + // CHECK-NEXT: %add = fadd float %5, %conv + // CHECK-NEXT: %7 = load %struct.S*, %struct.S** %p, align 8, !llvm.access.group !3 + // CHECK-NEXT: %a3 = getelementptr inbounds %struct.S, %struct.S* %7, i32 0, i32 0 + // CHECK-NEXT: %8 = load i32, i32* %a3, align 4, !llvm.access.group !3 + // CHECK-NEXT: %conv4 = sitofp i32 %8 to float + // CHECK-NEXT: %add5 = fadd float %add, %conv4 + // CHECK-NEXT: %9 = load float*, float** %a.addr, align 8, !llvm.access.group !3 + // CHECK-NEXT: %10 = load i32, i32* %i, align 4, !llvm.access.group !3 + // CHECK-NEXT: %idxprom6 = sext i32 %10 to i64 + // CHECK-NEXT: %arrayidx7 = getelementptr inbounds float, float* %9, i64 %idxprom6 + // CHECK-NEXT: store float %add5, float* %arrayidx7, align 4, !llvm.access.group !3 + // llvm.loop test + // CHECK: %omp_loop.next = add nuw i32 %omp_loop.iv, 1 + // CHECK-NEXT: br label %omp_loop.header, !llvm.loop !4 + // CHECK: !4 = distinct !{!4, !5, !6} + // CHECK-NEXT: !5 = !{!"llvm.loop.parallel_accesses"} + // CHECK-NEXT: !6 = !{!"llvm.loop.vectorize.enable"} + a[i] = b[i] + s.a + p->a; + } +} \ No newline at end of file diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -517,6 +517,12 @@ void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI); + /// Add metadata to simd-ise a loop. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loop The loop to simd-ise. + void createSIMDLoop(DebugLoc DL, CanonicalLoopInfo *Loop); + /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2116,6 +2116,19 @@ Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); } +/// Attach metadata access.group to the load and store instructions of \p block +static void addSIMDMetadata(BasicBlock *block, + ArrayRef Properties) { + for (auto &I : *block) { + if (isa(&I) || isa(&I)) { + Instruction *instr = dyn_cast(&I); + LLVMContext &C = instr->getContext(); + MDNode *N = MDNode::get(C, MDString::get(C, "")); + instr->setMetadata("llvm.access.group", N); + } + } +} + void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) { LLVMContext &Ctx = Builder.getContext(); addLoopMetadata( @@ -2131,6 +2144,30 @@ }); } +void OpenMPIRBuilder::createSIMDLoop(DebugLoc, CanonicalLoopInfo *Loop) { + LLVMContext &Ctx = Builder.getContext(); + addLoopMetadata( + Loop, + {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.parallel_accesses")), + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.vectorize.enable"))}); + BasicBlock *header = Loop->getHeader(); + BasicBlock *cond = Loop->getCond(); + BasicBlock *body = Loop->getBody(); + + addSIMDMetadata(header, + { + MDNode::get(Ctx, MDString::get(Ctx, "llvm.access.group")), + }); + addSIMDMetadata(cond, + { + MDNode::get(Ctx, MDString::get(Ctx, "llvm.access.group")), + }); + addSIMDMetadata(body, + { + MDNode::get(Ctx, MDString::get(Ctx, "llvm.access.group")), + }); +} + /// Create the TargetMachine object to query the backend for optimization /// preferences. ///