diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -933,7 +933,7 @@ 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); - CodeGenFunction BlockCGF{CGM, true}; + CodeGenFunction BlockCGF{CGM, true, ParentFn}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); @@ -1588,6 +1588,7 @@ StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args, blockDecl->getLocation(), blockInfo.getBlockExpr()->getBody()->getBeginLoc()); + HandleCodeTransformations(blockDecl->getBody()); // Okay. Undo some of what StartFunction did. diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -212,7 +212,8 @@ setFunctionLinkage(GD, Fn); - CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo); + CodeGenFunction(*this, false, cast(GD.getDecl())) + .GenerateCode(GD, Fn, FnInfo); setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(cast(GD.getDecl()), Fn); return Fn; diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -1882,6 +1882,7 @@ StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc()); + LoopStack.initAsOutlined(ParentCGF.LoopStack); CurSEHParent = ParentCGF.CurSEHParent; CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn); diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -14,7 +14,9 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H #define LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H +#include "clang/Basic/Transform.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Value.h" @@ -24,12 +26,18 @@ class BasicBlock; class Instruction; class MDNode; +class LLVMContext; } // end namespace llvm namespace clang { class Attr; class ASTContext; +class Stmt; +class Transform; + namespace CodeGen { +class CGTransformedTree; +class CGDebugInfo; /// Attributes that may be specified on loops. struct LoopAttributes { @@ -82,10 +90,14 @@ /// Construct a new LoopInfo for the loop with entry Header. LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, - LoopInfo *Parent); + LoopInfo *Parent, CGTransformedTree *TN); /// Get the loop id metadata for this loop. - llvm::MDNode *getLoopID() const { return TempLoopID.get(); } + llvm::MDNode *getLoopID() const { + if (LoopMD) + return LoopMD; + return TempLoopID.get(); + } /// Get the header block of this loop. llvm::BasicBlock *getHeader() const { return Header; } @@ -100,9 +112,13 @@ /// been processed. void finish(); + CGTransformedTree *Syntactical; + private: /// Loop ID metadata. llvm::TempMDTuple TempLoopID; + llvm::MDNode *LoopMD = nullptr; + /// Header block of this loop. llvm::BasicBlock *Header; /// The attributes for this loop. @@ -193,17 +209,27 @@ public: LoopInfoStack() {} + ~LoopInfoStack(); + + CGTransformedTree *lookupTransformedNode(const Stmt *S); + + void initAsOutlined(LoopInfoStack &ParentLIS) { + StmtToTree = ParentLIS.StmtToTree; + } + + void initBuild(ASTContext &ASTCtx, const LangOptions &LangOpts, + llvm::LLVMContext &LLVMCtx, CGDebugInfo *DbgInfo, Stmt *Body); /// Begin a new structured loop. The set of staged attributes will be /// applied to the loop and then cleared. void push(llvm::BasicBlock *Header, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc); + const llvm::DebugLoc &EndLoc, const Stmt *LoopStmt); /// Begin a new structured loop. Stage attributes from the Attrs list. /// The staged attributes are applied to the loop and then cleared. void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx, llvm::ArrayRef Attrs, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc); + const llvm::DebugLoc &EndLoc, const Stmt *LoopStmt); /// End the current loop. void pop(); @@ -280,6 +306,17 @@ LoopAttributes StagedAttrs; /// Stack of active loops. llvm::SmallVector, 4> Active; + + // CGTransformedTree *TransformedStructure = nullptr; + + /// Dictionary to find the TransformedNode representation for any loop. + llvm::DenseMap StmtToTree; + + /// Objects to free later. + /// @{ + llvm::SmallVector AllNodes; + llvm::SmallVector AllTransforms; + /// @} }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "CGLoopInfo.h" +#include "CGTransform.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" +#include "clang/Basic/LangOptions.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -455,14 +457,21 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, - LoopInfo *Parent) + LoopInfo *Parent, CGTransformedTree *TN) : Header(Header), Attrs(Attrs), StartLoc(StartLoc), EndLoc(EndLoc), Parent(Parent) { + if (TN) { + assert(TN->isCodeGenned() && "Emitted loop must be marked as code-genned"); + LoopMD = TN->makeLoopID(Header->getContext(), false); + AccGroup = TN->getAccessGroupOrNull(); + } + if (Attrs.IsParallel) { // Create an access group for this loop. LLVMContext &Ctx = Header->getContext(); - AccGroup = MDNode::getDistinct(Ctx, {}); + if (!AccGroup) + AccGroup = MDNode::getDistinct(Ctx, {}); } if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && @@ -477,9 +486,18 @@ !EndLoc) return; + assert(!LoopMD && + "#pragma clang transform is incompatible with loop attributes"); TempLoopID = MDNode::getTemporary(Header->getContext(), None); } +LoopInfoStack::~LoopInfoStack() { + for (auto N : AllNodes) + delete N; + for (auto T : AllTransforms) + delete T; +} + void LoopInfo::finish() { // We did not annotate the loop body instructions because there are no // attributes for this loop. @@ -563,10 +581,13 @@ } void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc) { - Active.emplace_back( - new LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, - Active.empty() ? nullptr : Active.back().get())); + const llvm::DebugLoc &EndLoc, + const clang::Stmt *LoopStmt) { + + LoopInfo *Parent = Active.empty() ? nullptr : Active.back().get(); + Active.emplace_back(new LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, + Parent, lookupTransformedNode(LoopStmt))); + // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } @@ -574,11 +595,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, ArrayRef Attrs, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc) { + const llvm::DebugLoc &EndLoc, + const clang::Stmt *LoopStmt) { // Identify loop hint attributes from Attrs. for (const auto *Attr : Attrs) { const LoopHintAttr *LH = dyn_cast(Attr); + const OpenCLUnrollHintAttr *OpenCLHint = dyn_cast(Attr); @@ -753,7 +776,7 @@ } /// Stage the attributes. - push(Header, StartLoc, EndLoc); + push(Header, StartLoc, EndLoc, LoopStmt); } void LoopInfoStack::pop() { @@ -762,6 +785,22 @@ Active.pop_back(); } +CGTransformedTree *LoopInfoStack::lookupTransformedNode(const clang::Stmt *S) { + const Stmt *Loop = getAssociatedLoop(S); + if (!Loop) + return nullptr; + return StmtToTree.lookup(Loop); +} + +void LoopInfoStack::initBuild(clang::ASTContext &ASTCtx, + const clang::LangOptions &LangOpts, + llvm::LLVMContext &LLVMCtx, CGDebugInfo *DbgInfo, + clang::Stmt *Body) { + CGTransformedTreeBuilder Builder(ASTCtx, LangOpts, LLVMCtx, AllNodes, + AllTransforms, DbgInfo); + Builder.computeTransformedStructure(Body, StmtToTree); +} + void LoopInfoStack::InsertHelper(Instruction *I) const { if (I->mayReadOrWriteMemory()) { SmallVector AccessGroups; diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp --- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -464,6 +464,7 @@ II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr, SC_PrivateExtern, false, false); CodeGenFunction NewCGF(CGM); + NewCGF.LoopStack.initAsOutlined(NewCGF.LoopStack); setCGF(&NewCGF); CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -239,7 +239,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emits object of ident_t type with info for source location. /// \param Flags Flags for OpenMP location. @@ -678,7 +679,8 @@ /// found along the way. /// \param S Starting statement. /// \param ParentName Name of the function declaration that is being scanned. - void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName); + void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName, + const FunctionDecl *ParentFn); /// Build type kmp_routine_entry_t (if not built yet). void emitKmpRoutineEntryT(QualType KmpInt32Ty); @@ -848,7 +850,8 @@ /// \param CodeGen Code generation sequence for the \a D directive. virtual llvm::Function *emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emits outlined function for the specified OpenMP teams directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, @@ -860,7 +863,8 @@ /// \param CodeGen Code generation sequence for the \a D directive. virtual llvm::Function *emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* @@ -881,7 +885,7 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts); + bool Tied, unsigned &NumberOfParts, const FunctionDecl *ParentFn); /// Cleans up references to the objects in finished function. /// @@ -1432,7 +1436,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of @@ -1679,11 +1684,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitParallelOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits outlined function for the specified OpenMP teams directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, @@ -1693,11 +1697,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* @@ -1718,7 +1721,8 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) override; + bool Tied, unsigned &NumberOfParts, + const FunctionDecl *ParentFn) override; /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -2122,7 +2126,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) override; + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1442,10 +1442,11 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, - const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { + const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); - CodeGenFunction CGF(CGM, true); + CodeGenFunction CGF(CGM, true, ParentFn); bool HasCancel = false; if (const auto *OPD = dyn_cast(&D)) HasCancel = OPD->hasCancel(); @@ -1471,25 +1472,29 @@ llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen, + ParentFn); } llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen, + ParentFn); } llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) { + bool Tied, unsigned &NumberOfParts, const FunctionDecl *ParentFn) { auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, PrePostActionTy &) { llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); @@ -1511,7 +1516,7 @@ : OMPD_task; const CapturedStmt *CS = D.getCapturedStmt(Region); const auto *TD = dyn_cast(&D); - CodeGenFunction CGF(CGM, true); + CodeGenFunction CGF(CGM, true, ParentFn); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, TD ? TD->hasCancel() : false, Action); @@ -6474,17 +6479,19 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { assert(!ParentName.empty() && "Invalid target region parent name!"); HasEmittedTargetRegion = true; emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, - IsOffloadEntry, CodeGen); + IsOffloadEntry, CodeGen, ParentFn); } void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { // Create a unique name for the entry function using the source location // information of the current target region. The name will be something like: // @@ -6508,7 +6515,7 @@ const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - CodeGenFunction CGF(CGM, true); + CodeGenFunction CGF(CGM, true, ParentFn); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -9456,8 +9463,8 @@ } } -void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, - StringRef ParentName) { +void CGOpenMPRuntime::scanForTargetRegionsFunctions( + const Stmt *S, StringRef ParentName, const FunctionDecl *ParentFn) { if (!S) return; @@ -9483,47 +9490,51 @@ switch (E.getDirectiveKind()) { case OMPD_target: - CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, - cast(E)); + CodeGenFunction::EmitOMPTargetDeviceFunction( + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_parallel: CodeGenFunction::EmitOMPTargetParallelDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_teams: CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_teams_distribute: CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), + ParentFn); break; case OMPD_target_teams_distribute_simd: CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), + ParentFn); break; case OMPD_target_parallel_for: CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_parallel_for_simd: CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), + ParentFn); break; case OMPD_target_simd: CodeGenFunction::EmitOMPTargetSimdDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_teams_distribute_parallel_for: CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CGM, ParentName, - cast(E)); + cast(E), ParentFn); break; case OMPD_target_teams_distribute_parallel_for_simd: CodeGenFunction:: EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( CGM, ParentName, - cast(E)); + cast(E), + ParentFn); break; case OMPD_parallel: case OMPD_for: @@ -9587,7 +9598,7 @@ return; scanForTargetRegionsFunctions( - E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); + E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName, ParentFn); return; } @@ -9597,7 +9608,7 @@ // Keep looking for target regions recursively. for (const Stmt *II : S->children()) - scanForTargetRegionsFunctions(II, ParentName); + scanForTargetRegionsFunctions(II, ParentName, ParentFn); } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { @@ -9618,7 +9629,7 @@ StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. if (const auto *FD = dyn_cast(VD)) { - scanForTargetRegionsFunctions(FD->getBody(), Name); + scanForTargetRegionsFunctions(FD->getBody(), Name, FD); Optional DevTy = OMPDeclareTargetDeclAttr::getDeviceType(FD); // Do not emit device_type(nohost) functions for the host. @@ -9643,12 +9654,12 @@ for (const CXXConstructorDecl *Ctor : RD->ctors()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); - scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); + scanForTargetRegionsFunctions(Ctor->getBody(), ParentName, Ctor); } if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); - scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); + scanForTargetRegionsFunctions(Dtor->getBody(), ParentName, Dtor); } } @@ -11320,13 +11331,15 @@ llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -11334,7 +11347,7 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) { + bool Tied, unsigned &NumberOfParts, const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -11534,7 +11547,8 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -110,7 +110,8 @@ void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emit outlined function specialized for the Single Program /// Multiple Data programming model for applicable target directives on the @@ -126,7 +127,8 @@ void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emit outlined function for 'target' directive on the NVPTX /// device. @@ -142,7 +144,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) override; + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -240,11 +243,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitParallelOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits inlined function for the specified OpenMP teams // directive. @@ -255,11 +257,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1150,7 +1150,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) { + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); EntryFunctionState EST; WorkerFunctionState WST(CGM, D.getBeginLoc()); @@ -1194,7 +1195,7 @@ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, - IsOffloadEntry, CodeGen); + IsOffloadEntry, CodeGen, ParentFn); IsInTTDRegion = false; // Now change the name of the worker function to correspond to this target @@ -1282,7 +1283,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) { + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { ExecutionRuntimeModesRAII ModeRAII( CurrentExecutionMode, RequiresFullRuntime, CGM.getLangOpts().OpenMPCUDAForceFullRuntime || @@ -1324,7 +1326,7 @@ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, - IsOffloadEntry, CodeGen); + IsOffloadEntry, CodeGen, ParentFn); IsInTTDRegion = false; } @@ -1866,7 +1868,8 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { if (!IsOffloadEntry) // Nothing to do. return; @@ -1875,10 +1878,10 @@ bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); if (Mode) emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, - CodeGen); + CodeGen, ParentFn); else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, - CodeGen); + CodeGen, ParentFn); setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } @@ -1960,7 +1963,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { bool &IsInParallelRegion; @@ -1984,7 +1988,7 @@ IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast(CGOpenMPRuntime::emitParallelOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen)); + D, ThreadIDVar, InnermostKind, CodeGen, ParentFn)); if (CGM.getLangOpts().Optimize) { OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); @@ -2042,7 +2046,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { SourceLocation Loc = D.getBeginLoc(); const RecordDecl *GlobalizedRD = nullptr; @@ -2105,7 +2110,7 @@ } Action(Loc, GlobalizedRD, MappedDeclsFields); CodeGen.setAction(Action); llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen); + D, ThreadIDVar, InnermostKind, CodeGen, ParentFn); if (CGM.getLangOpts().Optimize) { OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -359,7 +359,7 @@ cast(*S)); break; case Stmt::TransformExecutableDirectiveClass: - llvm_unreachable("not implemented"); + EmitTransformExecutableDirective(cast(*S)); break; } } @@ -727,7 +727,7 @@ const SourceRange &R = S.getSourceRange(); LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + SourceLocToDebugLoc(R.getEnd()), &S); // Create an exit block for when the condition fails, which will // also become the break target. @@ -829,7 +829,7 @@ const SourceRange &R = S.getSourceRange(); LoopStack.push(LoopBody, CGM.getContext(), DoAttrs, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + SourceLocToDebugLoc(R.getEnd()), &S); // C99 6.8.5.2: "The evaluation of the controlling expression takes place // after each execution of the loop body." @@ -887,7 +887,7 @@ const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + SourceLocToDebugLoc(R.getEnd()), &S); // If the for loop doesn't have an increment we can just use the // condition as the continue block. Otherwise we'll need to create @@ -988,7 +988,7 @@ const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + SourceLocToDebugLoc(R.getEnd()), &S); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -2434,6 +2434,7 @@ // Generate the function. StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), CD->getBody()->getBeginLoc()); + HandleCodeTransformations(&S); // Set the context parameter in CapturedStmtInfo. Address DeclPtr = GetAddrOfLocalVar(CD->getContextParam()); CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -485,6 +485,7 @@ // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); + CGF.HandleCodeTransformations(CD->getBody()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (const FieldDecl *FD : RD->fields()) { @@ -601,7 +602,7 @@ FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, /*RegisterCastedArgsOnly=*/true, CapturedStmtInfo->getHelperName()); - CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); + CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true, ParentFn); WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; Args.clear(); LocalAddrs.clear(); @@ -1278,7 +1279,8 @@ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen, + CGF.getParentFn()); if (const auto *NumThreadsClause = S.getSingleClause()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); llvm::Value *NumThreads = @@ -1434,7 +1436,7 @@ EmitBlock(CondBlock); const SourceRange R = S.getSourceRange(); LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + SourceLocToDebugLoc(R.getEnd()), &S); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -1953,7 +1955,7 @@ EmitBlock(CondBlock); const SourceRange R = S.getSourceRange(); LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + SourceLocToDebugLoc(R.getEnd()), &S); llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { @@ -2355,7 +2357,8 @@ } void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( - CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { + CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S, + const FunctionDecl *ParentFn) { // Emit SPMD target parallel for region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitOMPSimdRegion(CGF, S, Action); @@ -2364,7 +2367,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -3246,7 +3249,7 @@ }; llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, - Data.NumberOfParts); + Data.NumberOfParts, ParentFn); OMPLexicalScope Scope(*this, S, llvm::None, !isOpenMPParallelDirective(S.getDirectiveKind()) && !isOpenMPSimdDirective(S.getDirectiveKind())); @@ -3390,7 +3393,7 @@ }; llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, - Data.NumberOfParts); + Data.NumberOfParts, ParentFn); llvm::APInt TrueOrFalse(32, S.hasClausesOfKind() ? 1 : 0); IntegerLiteral IfCond(getContext(), TrueOrFalse, getContext().getIntTypeForBitwidth(32, /*Signed=*/0), @@ -3732,9 +3735,10 @@ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } -static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, - const CapturedStmt *S) { - CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); +static llvm::Function * +emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S, + const FunctionDecl *ParentFn) { + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true, ParentFn); CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; CGF.CapturedStmtInfo = &CapStmtInfo; llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); @@ -3757,7 +3761,8 @@ if (C) { llvm::SmallVector CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + llvm::Function *OutlinedFn = + emitOutlinedOrderedFunction(CGM, CS, ParentFn); CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars); } else { @@ -4302,8 +4307,8 @@ CGM.getMangledName(GlobalDecl(cast(CGF.CurFuncDecl))); // Emit target region as a standalone region. - CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, - IsOffloadEntry, CodeGen); + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen, CGF.getParentFn()); OMPLexicalScope Scope(CGF, S, OMPD_task); auto &&SizeEmitter = [IsOffloadEntry](CodeGenFunction &CGF, @@ -4335,9 +4340,9 @@ CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); } -void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, - StringRef ParentName, - const OMPTargetDirective &S) { +void CodeGenFunction::EmitOMPTargetDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S, + const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetRegion(CGF, S, Action); }; @@ -4345,7 +4350,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4363,7 +4368,8 @@ const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen, + CGF.getParentFn()); const auto *NT = S.getSingleClause(); const auto *TL = S.getSingleClause(); @@ -4422,8 +4428,8 @@ } void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( - CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDirective &S) { + CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S, + const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetTeamsRegion(CGF, Action, S); }; @@ -4431,7 +4437,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4469,7 +4475,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeDirective &S) { + const OMPTargetTeamsDistributeDirective &S, const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetTeamsDistributeRegion(CGF, Action, S); }; @@ -4477,7 +4483,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4515,7 +4521,8 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeSimdDirective &S) { + const OMPTargetTeamsDistributeSimdDirective &S, + const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetTeamsDistributeSimdRegion(CGF, Action, S); }; @@ -4523,7 +4530,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4655,7 +4662,8 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForDirective &S) { + const OMPTargetTeamsDistributeParallelForDirective &S, + const FunctionDecl *ParentFn) { // Emit SPMD target teams distribute parallel for region as a standalone // region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -4665,7 +4673,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4707,7 +4715,8 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + const OMPTargetTeamsDistributeParallelForSimdDirective &S, + const FunctionDecl *ParentFn) { // Emit SPMD target teams distribute parallel for simd region as a standalone // region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -4717,7 +4726,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4985,7 +4994,7 @@ void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelDirective &S) { + const OMPTargetParallelDirective &S, const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetParallelRegion(CGF, S, Action); }; @@ -4993,7 +5002,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -5024,7 +5033,7 @@ void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForDirective &S) { + const OMPTargetParallelForDirective &S, const FunctionDecl *ParentFn) { // Emit SPMD target parallel for region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetParallelForRegion(CGF, S, Action); @@ -5033,7 +5042,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -5063,7 +5072,7 @@ void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForSimdDirective &S) { + const OMPTargetParallelForSimdDirective &S, const FunctionDecl *ParentFn) { // Emit SPMD target parallel for region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetParallelForSimdRegion(CGF, S, Action); @@ -5072,7 +5081,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } diff --git a/clang/lib/CodeGen/CGTransform.h b/clang/lib/CodeGen/CGTransform.h new file mode 100644 --- /dev/null +++ b/clang/lib/CodeGen/CGTransform.h @@ -0,0 +1,142 @@ +//===---- CGTransform.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Emitting metadata for loop transformations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGTRANSFORM_H +#define LLVM_CLANG_LIB_CODEGEN_CGTRANSFORM_H + +#include "clang/Analysis/TransformedTree.h" +#include "llvm/IR/DebugLoc.h" + +namespace clang { +namespace CodeGen { +class CGDebugInfo; +class CGTransformedTreeBuilder; + +class CGTransformedTree : public TransformedTree { + friend class CGTransformedTreeBuilder; + friend class TransformedTree; + using BaseTy = TransformedTree; + using NodeTy = CGTransformedTree; + + BaseTy &getBase() { return *this; } + const BaseTy &getBase() const { return *this; } + + llvm::DebugLoc BeginLoc; + llvm::DebugLoc EndLoc; + + llvm::MDNode *AccessGroup = nullptr; + llvm::SmallVector ParallelAccessGroups; + + bool Finalized = false; + +public: + CGTransformedTree(llvm::ArrayRef SubLoops, NodeTy *BasedOn, + clang::Stmt *Original, int FollowupRole) + : TransformedTree(SubLoops, BasedOn, Original, FollowupRole), + IsCodeGenned(Original) {} + + bool IsDefault = true; + bool DisableHeuristic = false; + bool IsCodeGenned; + + llvm::SmallVector Attributes; + llvm::SmallVector Transforms; + llvm::SmallVector, 4> FollowupAttributes; + + bool collectLoopProperties(llvm::SmallVectorImpl &Props); + void addAttribute(bool Inherited, llvm::Metadata *Node); + +public: + void markNondefault() { IsDefault = false; } + void markDisableHeuristic() { DisableHeuristic = true; } + + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::ArrayRef Vals); + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name); + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, bool Val); + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, int Val); + + llvm::MDNode *getAccessGroupOrNull() { return AccessGroup; } + + llvm::MDNode *makeAccessGroup(llvm::LLVMContext &LLVMCtx); + + void + getOrCreateAccessGroups(llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AccessGroups); + void collectAccessGroups(llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AccessGroups); + + void finalize(llvm::LLVMContext &LLVMCtx) { Finalized = true; } + + llvm::ArrayRef getParallelAccessGroups() const { + assert(Finalized); + return ParallelAccessGroups; + } + + llvm::MDNode *makeLoopID(llvm::LLVMContext &Ctx, bool HasAllDisableNonforced); + + bool isCodeGenned() const { return IsCodeGenned; } +}; + +class CGTransformedTreeBuilder + : public TransformedTreeBuilder { + using BaseTy = + TransformedTreeBuilder; + using NodeTy = CGTransformedTree; + + BaseTy &getBase() { return *this; } + const BaseTy &getBase() const { return *this; } + + llvm::LLVMContext &LLVMCtx; + CGDebugInfo *DbgInfo; + +public: + CGTransformedTreeBuilder(ASTContext &ASTCtx, const LangOptions &LangOpts, + llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AllNodes, + llvm::SmallVectorImpl &AllTransforms, + CGDebugInfo *DbgInfo) + : TransformedTreeBuilder(ASTCtx, LangOpts, AllNodes, AllTransforms), + LLVMCtx(LLVMCtx), DbgInfo(DbgInfo) {} + + // Ignore any diagnostic and its arguments. + struct DummyDiag { + template DummyDiag operator<<(const T &) const { return {}; } + }; + DummyDiag Diag(SourceLocation Loc, unsigned DiagID) { return {}; } + + void applyOriginal(CGTransformedTree *L); + + void inheritLoopAttributes(CGTransformedTree *Dst, CGTransformedTree *Src, + bool IsMeta, bool IsSuccessor); + + void applyUnroll(LoopUnrollTransform *Trans, CGTransformedTree *OriginalLoop); + void applyUnrollAndJam(LoopUnrollAndJamTransform *Trans, + CGTransformedTree *OuterLoop, + CGTransformedTree *InnerLoop); + void applyDistribution(LoopDistributionTransform *Trans, + CGTransformedTree *OriginalLoop); + void applyVectorization(LoopVectorizationTransform *Trans, + CGTransformedTree *InputLoop); + void applyInterleaving(LoopInterleavingTransform *Trans, + CGTransformedTree *InputLoop); + + void finalize(NodeTy *Root); +}; + +} // namespace CodeGen +} // namespace clang +#endif /* LLVM_CLANG_LIB_CODEGEN_CGTRANSFORM_H */ diff --git a/clang/lib/CodeGen/CGTransform.cpp b/clang/lib/CodeGen/CGTransform.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/CodeGen/CGTransform.cpp @@ -0,0 +1,395 @@ +//===---- CGTransform.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Emitting metadata for loop transformations. +// +//===----------------------------------------------------------------------===// + +#include "CGTransform.h" +#include "CGDebugInfo.h" +#include "CGLoopInfo.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtTransform.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" + +using namespace clang; +using namespace clang::CodeGen; +using namespace llvm; + +void CodeGenFunction::HandleCodeTransformations(const Stmt *Body) { + if (!getParentFn()) { + // Transformations not supported for e.g. Objective-C + return; + } + + assert(CurFn && "Must be called after StartFunction"); + assert(Body); + + LoopStack.initBuild(getContext(), getLangOpts(), getLLVMContext(), DebugInfo, + getParentFn()->getBody()); +} + +void CodeGenFunction::EmitTransformExecutableDirective( + const TransformExecutableDirective &D) { + EmitStmt(D.getAssociated()); +} + +bool CGTransformedTree::collectLoopProperties( + llvm::SmallVectorImpl &Props) { + for (Metadata *M : this->Attributes) + Props.push_back(M); + for (Metadata *M : this->Transforms) + Props.push_back(M); + return !Props.empty(); +} + +void CGTransformedTree::addAttribute(bool Inherited, llvm::Metadata *Node) { + assert(!Finalized); + if (Inherited) + Attributes.push_back(Node); + else + Transforms.push_back(Node); +} + +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::ArrayRef Vals) { + addAttribute(Inherited, MDNode::get(LLVMCtx, Vals)); +} + +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name) { + addAttribute(LLVMCtx, Inherited, {MDString::get(LLVMCtx, Name)}); +} + +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, bool Val) { + addAttribute(LLVMCtx, Inherited, + {MDString::get(LLVMCtx, Name), + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt1Ty(LLVMCtx), Val))}); +} +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, int Val) { + addAttribute(LLVMCtx, Inherited, + {MDString::get(LLVMCtx, Name), + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt32Ty(LLVMCtx), Val))}); +} + +llvm::MDNode *CGTransformedTree::makeAccessGroup(llvm::LLVMContext &LLVMCtx) { + if (!AccessGroup) { + if (IsCodeGenned) + AccessGroup = MDNode::getDistinct(LLVMCtx, {}); + } + assert(!AccessGroup == !IsCodeGenned && + "Non-codegenned loop must not have an access group"); + return AccessGroup; +} + +void CGTransformedTree::getOrCreateAccessGroups( + llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AccessGroups) { + assert( + (IsCodeGenned || !getOriginal()) && + "Original loop should not be emitted if its transformed successors are"); + + if (IsCodeGenned) { + if (!AccessGroup) + AccessGroup = MDNode::getDistinct(LLVMCtx, {}); + AccessGroups.push_back(AccessGroup); + return; + } + + getBasedOn()->getOrCreateAccessGroups(LLVMCtx, AccessGroups); +} + +void CGTransformedTree::collectAccessGroups( + llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AccessGroups) { + auto AccGroup = makeAccessGroup(LLVMCtx); + if (AccGroup) + AccessGroups.push_back(AccGroup); + if (BasedOn) + BasedOn->collectAccessGroups(LLVMCtx, AccessGroups); +} + +llvm::MDNode *CGTransformedTree::makeLoopID(llvm::LLVMContext &Ctx, + bool HasAllDisableNonforced) { + assert(Finalized && isCodeGenned()); + if (IsDefault && (!DisableHeuristic || HasAllDisableNonforced)) + return nullptr; + + SmallVector Args; + + // Reserve operand 0 for loop id self reference. + Args.push_back(nullptr); + + if (BeginLoc) { + Args.push_back(BeginLoc.getAsMDNode()); + + // If we also have a valid end debug location for the loop, add it. + if (EndLoc) + Args.push_back(EndLoc.getAsMDNode()); + } + + if (!ParallelAccessGroups.empty()) { + SmallVector ArgOpts; + ArgOpts.reserve(ParallelAccessGroups.size()); + ArgOpts.push_back(MDString::get(Ctx, "llvm.loop.parallel_accesses")); + ArgOpts.insert(ArgOpts.end(), ParallelAccessGroups.begin(), + ParallelAccessGroups.end()); + Args.push_back(MDNode::get(Ctx, ArgOpts)); + } + + collectLoopProperties(Args); + + bool AllIsDisableHeuristic = false; + bool OtherIsNondefault = false; + for (auto P : FollowupAttributes) { + int Role = P.second->FollowupRole; + if (TransformedBy->isMetaRole(Role)) { + if (P.second->DisableHeuristic) + AllIsDisableHeuristic = true; + } else { + if (!P.second->IsDefault) + OtherIsNondefault = true; + } + } + + for (auto P : FollowupAttributes) { + StringRef FollowupName = P.first; + NodeTy *FollowupNode = P.second; + llvm::MDNode *FollowupId; + if (TransformedBy->isMetaRole(FollowupNode->FollowupRole)) { + if (OtherIsNondefault) + FollowupNode->markNondefault(); + FollowupId = FollowupNode->makeLoopID(Ctx, false); + } else { + FollowupId = FollowupNode->makeLoopID(Ctx, AllIsDisableHeuristic); + } + if (!FollowupId) + continue; + + Args.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, FollowupName), FollowupId})); + } + + if (DisableHeuristic && !HasAllDisableNonforced) + Args.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.disable_nonforced")})); + + // No need for an MDNode if it is empty. + if (Args.size() <= 1) + return nullptr; + + // Set the first operand to itself. + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + return LoopID; +} + +void CGTransformedTreeBuilder::applyOriginal(CGTransformedTree *L) { + if (!DbgInfo) + return; + + L->BeginLoc = DbgInfo->SourceLocToDebugLoc(L->getOriginal()->getBeginLoc()); + L->EndLoc = DbgInfo->SourceLocToDebugLoc(L->getOriginal()->getEndLoc()); +} + +void CGTransformedTreeBuilder::inheritLoopAttributes(CGTransformedTree *Dst, + CGTransformedTree *Src, + bool IsMeta, + bool IsSuccessor) { + Dst->BeginLoc = Src->BeginLoc; + Dst->EndLoc = Src->EndLoc; + + if (!IsMeta) + for (llvm::Metadata *A : Src->Attributes) + Dst->Attributes.push_back(A); +} + +void CGTransformedTreeBuilder::applyUnroll(LoopUnrollTransform *Trans, + CGTransformedTree *OriginalLoop) { + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll.enable"); + + if (Trans->isFull()) { + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll.full"); + } else { + int Factor = Trans->getFactor(); + if (Factor > 0) + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll.count", + Factor); + } + + for (CGTransformedTree *F : OriginalLoop->Followups) { + switch (F->FollowupRole) { + case LoopUnrollTransform::FollowupAll: + OriginalLoop->FollowupAttributes.emplace_back(LLVMLoopUnrollFollowupAll, + F); + F->markDisableHeuristic(); + break; + case LoopUnrollTransform::FollowupUnrolled: + OriginalLoop->FollowupAttributes.emplace_back( + LLVMLoopUnrollFollowupUnrolled, F); + break; + case LoopUnrollTransform::FollowupRemainder: + OriginalLoop->FollowupAttributes.emplace_back( + LLVMLoopUnrollFollowupRemainder, F); + break; + } + } + + OriginalLoop->markNondefault(); + OriginalLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyUnrollAndJam( + LoopUnrollAndJamTransform *Trans, CGTransformedTree *OuterLoop, + CGTransformedTree *InnerLoop) { + OuterLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll_and_jam.enable"); + + int Factor = Trans->getFactor(); + if (Factor > 0) + OuterLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll_and_jam.count", + Factor); + + for (CGTransformedTree *F : OuterLoop->Followups) { + switch (F->FollowupRole) { + case LoopUnrollAndJamTransform::FollowupAll: + OuterLoop->FollowupAttributes.emplace_back( + "llvm.loop.unroll_and_jam.followup_all", F); + F->markDisableHeuristic(); + break; + case LoopUnrollAndJamTransform::FollowupOuter: + OuterLoop->FollowupAttributes.emplace_back( + "llvm.loop.unroll_and_jam.followup_outer", F); + break; + case LoopUnrollAndJamTransform::FollowupInner: + // LLVM's LoopUnrollAndJam pass expects the followup attributes for the + // inner loop to be attached to the outer loop. + OuterLoop->FollowupAttributes.emplace_back( + "llvm.loop.unroll_and_jam.followup_inner", F); + break; + } + } + + OuterLoop->markNondefault(); + OuterLoop->markDisableHeuristic(); + InnerLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyDistribution( + LoopDistributionTransform *Trans, CGTransformedTree *OriginalLoop) { + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.distribute.enable", + true); + + for (CGTransformedTree *F : OriginalLoop->Followups) { + switch (F->FollowupRole) { + case LoopDistributionTransform::FollowupAll: + OriginalLoop->FollowupAttributes.emplace_back( + "llvm.loop.distribute.followup_all", F); + F->markDisableHeuristic(); + break; + } + } + + OriginalLoop->markNondefault(); + OriginalLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyVectorization( + LoopVectorizationTransform *Trans, CGTransformedTree *MainLoop) { + // Enable vectorization, disable interleaving. + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.vectorize.enable", true); + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.interleave.count", 1); + + // If SIMD width is specified, forward it. + int Width = Trans->getWidth(); + if (Width > 0) + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.vectorize.width", Width); + + for (CGTransformedTree *F : MainLoop->Followups) { + switch (F->FollowupRole) { + case LoopVectorizationTransform ::FollowupAll: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_all", F); + F->markDisableHeuristic(); + break; + case LoopVectorizationTransform ::FollowupVectorized: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_vectorized", F); + // F->markDisableHeuristic(); + break; + case LoopVectorizationTransform ::FollowupEpilogue: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_epilogue", F); + // F->markDisableHeuristic(); + break; + } + } + + MainLoop->markNondefault(); + MainLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyInterleaving( + LoopInterleavingTransform *Trans, CGTransformedTree *MainLoop) { + // Enable the LoopVectorize pass, but explicitly disable vectorization to only + // apply interleaving. + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.vectorize.enable", true); + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.vectorize.width", 1); + + // If interleave factor is specified, forward it. + int Factor = Trans->getFactor(); + if (Factor > 0) + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.interleave.count", + Factor); + + for (CGTransformedTree *F : MainLoop->Followups) { + switch (F->FollowupRole) { + case LoopInterleavingTransform ::FollowupAll: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_all", F); + F->markDisableHeuristic(); + break; + case LoopInterleavingTransform ::FollowupInterleaved: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_vectorized", F); + break; + case LoopInterleavingTransform ::FollowupEpilogue: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_epilogue", F); + break; + } + } + + MainLoop->markNondefault(); + MainLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::finalize(NodeTy *Root) { + SmallVector Worklist; + SmallSet Visited; + Worklist.push_back(Root); + + while (!Worklist.empty()) { + auto *N = Worklist.pop_back_val(); + auto It = Visited.insert(N); + if (!It.second) + continue; + + N->finalize(LLVMCtx); + + for (auto SubLoop : N->getSubLoops()) + Worklist.push_back(SubLoop); + + for (auto Followup : N->getFollowups()) + Worklist.push_back(Followup); + } +} diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -73,6 +73,7 @@ CGRecordLayoutBuilder.cpp CGStmt.cpp CGStmtOpenMP.cpp + CGTransform.cpp CGVTT.cpp CGVTables.cpp CodeGenABITypes.cpp diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -96,6 +96,7 @@ class TargetCodeGenInfo; struct OMPTaskDataTy; struct CGCoroData; +class CGTransformedTree; /// The kind of evaluation to perform on values of a particular /// type. Basically, is the code in CGExprScalar, CGExprComplex, or @@ -1620,7 +1621,8 @@ llvm::Function *Fn); public: - CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext = false); + CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext = false, + const FunctionDecl *ParentFn = nullptr); ~CodeGenFunction(); CodeGenTypes &getTypes() const { return CGM.getTypes(); } @@ -2943,6 +2945,9 @@ llvm::Value *EmitSEHExceptionInfo(); llvm::Value *EmitSEHAbnormalTermination(); + void HandleCodeTransformations(const Stmt *Body); + void EmitTransformExecutableDirective(const TransformExecutableDirective &D); + /// Emit simple code for OpenMP directives in Simd-only mode. void EmitSimpleOMPExecutableDirective(const OMPExecutableDirective &D); @@ -3202,43 +3207,50 @@ /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetDirective &S); + const OMPTargetDirective &S, + const FunctionDecl *ParentFn); static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelDirective &S); + const OMPTargetParallelDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target parallel for directive. static void EmitOMPTargetParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForDirective &S); + const OMPTargetParallelForDirective &S, const FunctionDecl *ParentFn); /// Emit device code for the target parallel for simd directive. static void EmitOMPTargetParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForSimdDirective &S); + const OMPTargetParallelForSimdDirective &S, const FunctionDecl *ParentFn); /// Emit device code for the target teams directive. - static void - EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDirective &S); + static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetTeamsDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target teams distribute directive. static void EmitOMPTargetTeamsDistributeDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeDirective &S); + const OMPTargetTeamsDistributeDirective &S, const FunctionDecl *ParentFn); /// Emit device code for the target teams distribute simd directive. static void EmitOMPTargetTeamsDistributeSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeSimdDirective &S); + const OMPTargetTeamsDistributeSimdDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target simd directive. static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetSimdDirective &S); + const OMPTargetSimdDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target teams distribute parallel for simd /// directive. static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForSimdDirective &S); + const OMPTargetTeamsDistributeParallelForSimdDirective &S, + const FunctionDecl *ParentFn); static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForDirective &S); + const OMPTargetTeamsDistributeParallelForDirective &S, + const FunctionDecl *ParentFn); /// Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. @@ -4383,6 +4395,11 @@ llvm::Value *EmitX86CpuSupports(uint64_t Mask); llvm::Value *EmitX86CpuInit(); llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); + + const FunctionDecl *ParentFn = nullptr; + +public: + const FunctionDecl *getParentFn() const { return ParentFn; } }; inline DominatingLLVMValue::saved_type diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -60,13 +60,15 @@ return CGOpts.OptimizationLevel != 0; } -CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) +CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext, + const FunctionDecl *ParentFn) : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()), Builder(cgm, cgm.getModule().getContext(), llvm::ConstantFolder(), CGBuilderInserterTy(this)), SanOpts(CGM.getLangOpts().Sanitize), DebugInfo(CGM.getModuleDebugInfo()), - PGO(cgm), ShouldEmitLifetimeMarkers(shouldEmitLifetimeMarkers( - CGM.getCodeGenOpts(), CGM.getLangOpts())) { + ParentFn(ParentFn), PGO(cgm), + ShouldEmitLifetimeMarkers( + shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) { if (!suppressNewContext) CGM.getCXXABI().getMangleContext().startNewFunction(); @@ -1260,6 +1262,8 @@ // Emit the standard function prologue. StartFunction(GD, ResTy, Fn, FnInfo, Args, Loc, BodyRange.getBegin()); + if (Body) + HandleCodeTransformations(Body); // Generate the body of the function. PGO.assignRegionCounters(GD, CurFn); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4443,7 +4443,10 @@ maybeSetTrivialComdat(*D, *Fn); - CodeGenFunction(*this).GenerateCode(D, Fn, FI); + { + CodeGenFunction CGF(*this, false, D); + CGF.GenerateCode(D, Fn, FI); + } setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(D, Fn); diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-distribute.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-distribute.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-distribute.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_distribute(int *List, int Length) { +#pragma clang transform distribute + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[DISTRIBUTE_ENABLE:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_ENABLE]] = !{!"llvm.loop.distribute.enable", i1 true} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-factor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-factor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-factor.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_interleave_factor(int *List, int Length) { +#pragma clang transform interleave factor(2) + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[INTERLEAVE_ENABLE:[0-9]+]], ![[VECTORIZE_DISABLE:[0-9]+]], ![[INTERLEAVE_COUNT:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[INTERLEAVE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_DISABLE]] = !{!"llvm.loop.vectorize.width", i32 1} +// CHECK-DAG: ![[INTERLEAVE_COUNT]] = !{!"llvm.loop.interleave.count", i32 2} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-successor.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_interleave_successor(int *List, int Length) { +#pragma clang transform unroll +#pragma clang transform interleave + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[INTERLEAVE_ENABLE:[0-9]+]], ![[VECTORIZE_DISABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[VECTORIZE_FOLLOWUP_VECTORIZED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[INTERLEAVE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_DISABLE]] = !{!"llvm.loop.vectorize.width", i32 1} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_VECTORIZED]] = !{!"llvm.loop.vectorize.followup_vectorized", ![[LOOP_VECTORIZED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_VECTORIZED]] = distinct !{![[LOOP_VECTORIZED]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", !12} + +// CHECK-DAG: ![[LOOP_UNROLLED:[0-9]+]] = distinct !{![[LOOP_UNROLLED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_interleave(int *List, int Length) { +#pragma clang transform interleave + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[INTERLEAVE_ENABLE:[0-9]+]], ![[VECTORIZE_DISABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[INTERLEAVE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_DISABLE]] = !{!"llvm.loop.vectorize.width", i32 1} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-full.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-full.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-full.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll_full(int *List, int Length) { +#pragma clang transform unroll full + for (int i = 0; i < 4; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FULL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-partial.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-partial.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-partial.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll_partial(int *List, int Length) { +#pragma clang transform unroll partial(8) + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FACTOR:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FACTOR]] = !{!"llvm.loop.unroll.count", i32 8} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_UNROLL_ALL]] = distinct !{![[LOOP_UNROLL_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-successor.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll_successor(int *List, int Length) { +#pragma clang transform distribute +#pragma clang transform unroll + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[UNROLL_FOLLOWUP_UNROLLED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_UNROLLED]] = !{!"llvm.loop.unroll.followup_unrolled", ![[LOOP_UNROLLED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLED]] = distinct !{![[LOOP_UNROLLED]], ![[DISTRIBUTE_ENABLE:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_ENABLE]] = !{!"llvm.loop.distribute.enable", i1 true} +// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_ALL]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_DISTRIBUTE_ALL:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLL_ALL]] = distinct !{![[LOOP_UNROLL_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[LOOP_DISTRIBUTE_ALL]] = distinct !{![[LOOP_DISTRIBUTE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll(int *List, int Length) { +#pragma clang transform unroll + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_UNROLL_ALL]] = distinct !{![[LOOP_UNROLL_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-partial.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-partial.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-partial.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam(int *List, int Length) { +#pragma clang transform unrollandjam partial(4) + for (int i = 0; i < Length; i++) { + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_COUNT:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED]]} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_COUNT]] = !{!"llvm.loop.unroll_and_jam.count", i32 4} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll_and_jam.followup_all", ![[LOOP_UNROLLANDJAM_ALL:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_ALL]] = distinct !{![[LOOP_UNROLLANDJAM_ALL]], ![[DISABLE_NONFORCED]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-predecessor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-predecessor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-predecessor.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam_predecessor(int *List, int Length) { +#pragma clang transform unrollandjam + for (int i = 0; i < Length; i++) { + #pragma clang transform unroll + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_INNER_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_INNER_UNROLL_ALL]] = distinct !{![[LOOP_INNER_UNROLL_ALL]], ![[DISABLE_NONFORCED]]} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED]]} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll_and_jam.followup_all", ![[LOOP_UNROLLANDJAM_ALL:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_ALL]] = distinct !{![[LOOP_UNROLLANDJAM_ALL]], ![[DISABLE_NONFORCED]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-successor.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam_successor(int *List, int Length) { +#pragma clang transform distribute +#pragma clang transform unrollandjam + for (int i = 0; i < Length; i++) { + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_ALL:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_OUTER:[0-9]+]], ![[DISABLE_NONFORCED]]} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll_and_jam.followup_all", ![[LOOP_UNROLLANDJAM_ALL:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_OUTER]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_UNROLLANDJAM_OUTER:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_ALL]] = distinct !{![[LOOP_UNROLLANDJAM_ALL]], ![[DISABLE_NONFORCED]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_OUTER]] = distinct !{![[LOOP_UNROLLANDJAM_OUTER]], ![[DISTRIBUTE_ENABLE:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_ENABLE]] = !{!"llvm.loop.distribute.enable", i1 true} +// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_ALL]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_DISTRIBUTE_FOLLOWUP_ALL]] = distinct !{![[LOOP_DISTRIBUTE_FOLLOWUP_ALL]], ![[DISABLE_NONFORCED]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam(int *List, int Length) { +#pragma clang transform unrollandjam + for (int i = 0; i < Length; i++) { + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED]]} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll_and_jam.followup_all", ![[LOOP_UNROLLANDJAM_ALL:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_ALL]] = distinct !{![[LOOP_UNROLLANDJAM_ALL]], ![[DISABLE_NONFORCED]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-successor.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +void pragma_transform_vectorize_successor(int *List, int Length) { +#pragma clang transform unroll +#pragma clang transform vectorize + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[VECTORIZE_ENABLE:[0-9]+]], ![[INTERLEAVE_DISABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[VECTORIZE_FOLLOWUP_VECTORIZED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[INTERLEAVE_DISABLE]] = !{!"llvm.loop.interleave.count", i32 1} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_VECTORIZED]] = !{!"llvm.loop.vectorize.followup_vectorized", ![[LOOP_VECTORIZED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_VECTORIZED]] = distinct !{![[LOOP_VECTORIZED]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", !12} + +// CHECK-DAG: ![[LOOP_UNROLLED:[0-9]+]] = distinct !{![[LOOP_UNROLLED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-width.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-width.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-width.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_vectorize(int *List, int Length) { +#pragma clang transform vectorize width(4) + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[VECTORIZE_ENABLE:[0-9]+]], ![[INTERLEAVE_DISABLE:[0-9]+]], ![[VECTORIZE_WIDTH:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[INTERLEAVE_DISABLE]] = !{!"llvm.loop.interleave.count", i32 1} +// CHECK-DAG: ![[VECTORIZE_WIDTH]] = !{!"llvm.loop.vectorize.width", i32 4} + +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_vectorize(int *List, int Length) { +#pragma clang transform vectorize + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[VECTORIZE_ENABLE:[0-9]+]], ![[INTERLEAVE_DISABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[INTERLEAVE_DISABLE]] = !{!"llvm.loop.interleave.count", i32 1} + +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]}