diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -932,7 +932,7 @@ 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); - CodeGenFunction BlockCGF{CGM, true}; + CodeGenFunction BlockCGF{CGM, true, ParentFn}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); @@ -1587,6 +1587,7 @@ StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args, blockDecl->getLocation(), blockInfo.getBlockExpr()->getBody()->getBeginLoc()); + HandleCodeTransformations(blockDecl->getBody()); // Okay. Undo some of what StartFunction did. diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -211,7 +211,8 @@ setFunctionLinkage(GD, Fn); - CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo); + CodeGenFunction(*this, false, cast(GD.getDecl())) + .GenerateCode(GD, Fn, FnInfo); setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(cast(GD.getDecl()), Fn); return Fn; diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -1882,6 +1882,7 @@ StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc()); + LoopStack.initAsOutlined(ParentCGF.LoopStack); CurSEHParent = ParentCGF.CurSEHParent; CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn); diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -14,174 +14,52 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H #define LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H -#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Compiler.h" namespace llvm { class BasicBlock; class Instruction; class MDNode; +class LLVMContext; } // end namespace llvm namespace clang { -class Attr; class ASTContext; -namespace CodeGen { - -/// Attributes that may be specified on loops. -struct LoopAttributes { - explicit LoopAttributes(bool IsParallel = false); - void clear(); - - /// Generate llvm.loop.parallel metadata for loads and stores. - bool IsParallel; - - /// State of loop vectorization or unrolling. - enum LVEnableState { Unspecified, Enable, Disable, Full }; - - /// Value for llvm.loop.vectorize.enable metadata. - LVEnableState VectorizeEnable; - - /// Value for llvm.loop.unroll.* metadata (enable, disable, or full). - LVEnableState UnrollEnable; - - /// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full). - LVEnableState UnrollAndJamEnable; - - /// Value for llvm.loop.vectorize.predicate metadata - LVEnableState VectorizePredicateEnable; - - /// Value for llvm.loop.vectorize.width metadata. - unsigned VectorizeWidth; - - /// Value for llvm.loop.interleave.count metadata. - unsigned InterleaveCount; - - /// llvm.unroll. - unsigned UnrollCount; - - /// llvm.unroll. - unsigned UnrollAndJamCount; +class Stmt; +class Transform; - /// Value for llvm.loop.distribute.enable metadata. - LVEnableState DistributeEnable; - - /// Value for llvm.loop.pipeline.disable metadata. - bool PipelineDisabled; - - /// Value for llvm.loop.pipeline.iicount metadata. - unsigned PipelineInitiationInterval; -}; +namespace CodeGen { +class CGTransformedTree; +class CGDebugInfo; /// Information used when generating a structured loop. class LoopInfo { public: /// Construct a new LoopInfo for the loop with entry Header. - LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, - LoopInfo *Parent); + LoopInfo(llvm::BasicBlock *Header, CGTransformedTree *TreeNode); /// Get the loop id metadata for this loop. - llvm::MDNode *getLoopID() const { return TempLoopID.get(); } + llvm::MDNode *getLoopID() const { return LoopMD; } /// Get the header block of this loop. llvm::BasicBlock *getHeader() const { return Header; } - /// Get the set of attributes active for this loop. - const LoopAttributes &getAttributes() const { return Attrs; } /// Return this loop's access group or nullptr if it does not have one. llvm::MDNode *getAccessGroup() const { return AccGroup; } - /// Create the loop's metadata. Must be called after its nested loops have - /// been processed. - void finish(); private: - /// Loop ID metadata. - llvm::TempMDTuple TempLoopID; + /// The metadata node containing this loop's properties. It is assigned to the + /// terminators of all loop latches. + llvm::MDNode *LoopMD = nullptr; + /// Header block of this loop. llvm::BasicBlock *Header; - /// The attributes for this loop. - LoopAttributes Attrs; - /// The access group for memory accesses parallel to this loop. - llvm::MDNode *AccGroup = nullptr; - /// Start location of this loop. - llvm::DebugLoc StartLoc; - /// End location of this loop. - llvm::DebugLoc EndLoc; - /// The next outer loop, or nullptr if this is the outermost loop. - LoopInfo *Parent; - /// If this loop has unroll-and-jam metadata, this can be set by the inner - /// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner - /// metadata. - llvm::MDNode *UnrollAndJamInnerFollowup = nullptr; - /// Create a LoopID without any transformations. - llvm::MDNode * - createLoopPropertiesMetadata(llvm::ArrayRef LoopProperties); - - /// Create a LoopID for transformations. - /// - /// The methods call each other in case multiple transformations are applied - /// to a loop. The transformation first to be applied will use LoopID of the - /// next transformation in its followup attribute. - /// - /// @param Attrs The loop's transformations. - /// @param LoopProperties Non-transformation properties such as debug - /// location, parallel accesses and disabled - /// transformations. These are added to the returned - /// LoopID. - /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes - /// at least one transformation. - /// - /// @return A LoopID (metadata node) that can be used for the llvm.loop - /// annotation or followup-attribute. - /// @{ - llvm::MDNode * - createPipeliningMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef LoopProperties, - bool &HasUserTransforms); - llvm::MDNode * - createPartialUnrollMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef LoopProperties, - bool &HasUserTransforms); - llvm::MDNode * - createUnrollAndJamMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef LoopProperties, - bool &HasUserTransforms); - llvm::MDNode * - createLoopVectorizeMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef LoopProperties, - bool &HasUserTransforms); - llvm::MDNode * - createLoopDistributeMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef LoopProperties, - bool &HasUserTransforms); - llvm::MDNode * - createFullUnrollMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef LoopProperties, - bool &HasUserTransforms); - /// @} - - /// Create a LoopID for this loop, including transformation-unspecific - /// metadata such as debug location. - /// - /// @param Attrs This loop's attributes and transformations. - /// @param LoopProperties Additional non-transformation properties to add - /// to the LoopID, such as transformation-specific - /// metadata that are not covered by @p Attrs. - /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes - /// at least one transformation. - /// - /// @return A LoopID (metadata node) that can be used for the llvm.loop - /// annotation. - llvm::MDNode *createMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef LoopProperties, - bool &HasUserTransforms); + /// The metadata node to be assigned to all memory accesses within the loop. + llvm::MDNode *AccGroup = nullptr; }; /// A stack of loop information corresponding to loop nesting levels. @@ -193,93 +71,39 @@ public: LoopInfoStack() {} + ~LoopInfoStack(); + + void initAsOutlined(LoopInfoStack &ParentLIS) { + StmtToTree = ParentLIS.StmtToTree; + } - /// Begin a new structured loop. The set of staged attributes will be - /// applied to the loop and then cleared. - void push(llvm::BasicBlock *Header, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc); + void initBuild(ASTContext &ASTCtx, llvm::LLVMContext &LLVMCtx, + CGDebugInfo *DbgInfo, Stmt *Body); - /// Begin a new structured loop. Stage attributes from the Attrs list. - /// The staged attributes are applied to the loop and then cleared. - void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx, - llvm::ArrayRef Attrs, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc); + /// Begin a new structured loop. + void push(llvm::BasicBlock *Header, const Stmt *LoopStmt); /// End the current loop. void pop(); - /// Return the top loop id metadata. - llvm::MDNode *getCurLoopID() const { return getInfo().getLoopID(); } - - /// Return true if the top loop is parallel. - bool getCurLoopParallel() const { - return hasInfo() ? getInfo().getAttributes().IsParallel : false; - } - /// Function called by the CodeGenFunction when an instruction is /// created. void InsertHelper(llvm::Instruction *I) const; - /// Set the next pushed loop as parallel. - void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; } - - /// Set the next pushed loop 'vectorize.enable' - void setVectorizeEnable(bool Enable = true) { - StagedAttrs.VectorizeEnable = - Enable ? LoopAttributes::Enable : LoopAttributes::Disable; - } - - /// Set the next pushed loop as a distribution candidate. - void setDistributeState(bool Enable = true) { - StagedAttrs.DistributeEnable = - Enable ? LoopAttributes::Enable : LoopAttributes::Disable; - } - - /// Set the next pushed loop unroll state. - void setUnrollState(const LoopAttributes::LVEnableState &State) { - StagedAttrs.UnrollEnable = State; - } - - /// Set the next pushed vectorize predicate state. - void setVectorizePredicateState(const LoopAttributes::LVEnableState &State) { - StagedAttrs.VectorizePredicateEnable = State; - } - - /// Set the next pushed loop unroll_and_jam state. - void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) { - StagedAttrs.UnrollAndJamEnable = State; - } - - /// Set the vectorize width for the next loop pushed. - void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; } - - /// Set the interleave count for the next loop pushed. - void setInterleaveCount(unsigned C) { StagedAttrs.InterleaveCount = C; } - - /// Set the unroll count for the next loop pushed. - void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; } - - /// \brief Set the unroll count for the next loop pushed. - void setUnrollAndJamCount(unsigned C) { StagedAttrs.UnrollAndJamCount = C; } - - /// Set the pipeline disabled state. - void setPipelineDisabled(bool S) { StagedAttrs.PipelineDisabled = S; } - - /// Set the pipeline initiation interval. - void setPipelineInitiationInterval(unsigned C) { - StagedAttrs.PipelineInitiationInterval = C; - } - private: /// Returns true if there is LoopInfo on the stack. bool hasInfo() const { return !Active.empty(); } /// Return the LoopInfo for the current loop. HasInfo should be called /// first to ensure LoopInfo is present. const LoopInfo &getInfo() const { return *Active.back(); } - /// The set of attributes that will be applied to the next pushed loop. - LoopAttributes StagedAttrs; /// Stack of active loops. llvm::SmallVector, 4> Active; + + llvm::SmallVector AllNodes; + llvm::SmallVector AllTransforms; + llvm::DenseMap StmtToTree; + + CGTransformedTree *TransformedStructure = nullptr; }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -7,757 +7,44 @@ //===----------------------------------------------------------------------===// #include "CGLoopInfo.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/Attr.h" +#include "CGTransform.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" + using namespace clang::CodeGen; using namespace llvm; -MDNode * -LoopInfo::createLoopPropertiesMetadata(ArrayRef LoopProperties) { - LLVMContext &Ctx = Header->getContext(); - SmallVector NewLoopProperties; - TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); - NewLoopProperties.push_back(TempNode.get()); - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - - MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); - LoopID->replaceOperandWith(0, LoopID); - return LoopID; -} - -MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, - ArrayRef LoopProperties, - bool &HasUserTransforms) { - LLVMContext &Ctx = Header->getContext(); - - Optional Enabled; - if (Attrs.PipelineDisabled) - Enabled = false; - else if (Attrs.PipelineInitiationInterval != 0) - Enabled = true; - - if (Enabled != true) { - SmallVector NewLoopProperties; - if (Enabled == false) { - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - NewLoopProperties.push_back( - MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.disable"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt1Ty(Ctx), 1))})); - LoopProperties = NewLoopProperties; - } - return createLoopPropertiesMetadata(LoopProperties); +LoopInfo::LoopInfo(llvm::BasicBlock *Header, CGTransformedTree *TreeNode) + : Header(Header) { + if (TreeNode) { + LoopMD = TreeNode->makeLoopID(Header->getContext(), false); + AccGroup = TreeNode->getAccessGroupOrNull(); } - - SmallVector Args; - TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); - Args.push_back(TempNode.get()); - Args.append(LoopProperties.begin(), LoopProperties.end()); - - if (Attrs.PipelineInitiationInterval > 0) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - // No follow-up: This is the last transformation. - - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); - HasUserTransforms = true; - return LoopID; } -MDNode * -LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, - ArrayRef LoopProperties, - bool &HasUserTransforms) { - LLVMContext &Ctx = Header->getContext(); - - Optional Enabled; - if (Attrs.UnrollEnable == LoopAttributes::Disable) - Enabled = false; - else if (Attrs.UnrollEnable == LoopAttributes::Full) - Enabled = None; - else if (Attrs.UnrollEnable != LoopAttributes::Unspecified || - Attrs.UnrollCount != 0) - Enabled = true; - - if (Enabled != true) { - // createFullUnrollMetadata will already have added llvm.loop.unroll.disable - // if unrolling is disabled. - return createPipeliningMetadata(Attrs, LoopProperties, HasUserTransforms); - } - - SmallVector FollowupLoopProperties; - - // Apply all loop properties to the unrolled loop. - FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - - // Don't unroll an already unrolled loop. - FollowupLoopProperties.push_back( - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); - - bool FollowupHasTransforms = false; - MDNode *Followup = createPipeliningMetadata(Attrs, FollowupLoopProperties, - FollowupHasTransforms); - - SmallVector Args; - TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); - Args.push_back(TempNode.get()); - Args.append(LoopProperties.begin(), LoopProperties.end()); - - // Setting unroll.count - if (Attrs.UnrollCount > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt32Ty(Ctx), Attrs.UnrollCount))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - // Setting unroll.full or unroll.disable - if (Attrs.UnrollEnable == LoopAttributes::Enable) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.enable")}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll.followup_all"), Followup})); - - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); - HasUserTransforms = true; - return LoopID; +LoopInfoStack::~LoopInfoStack() { + for (auto N : AllNodes) + delete N; + for (auto T : AllTransforms) + delete T; } -MDNode * -LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, - ArrayRef LoopProperties, - bool &HasUserTransforms) { - LLVMContext &Ctx = Header->getContext(); - - Optional Enabled; - if (Attrs.UnrollAndJamEnable == LoopAttributes::Disable) - Enabled = false; - else if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable || - Attrs.UnrollAndJamCount != 0) - Enabled = true; - - if (Enabled != true) { - SmallVector NewLoopProperties; - if (Enabled == false) { - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - NewLoopProperties.push_back(MDNode::get( - Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); - LoopProperties = NewLoopProperties; - } - return createPartialUnrollMetadata(Attrs, LoopProperties, - HasUserTransforms); - } - - SmallVector FollowupLoopProperties; - FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - FollowupLoopProperties.push_back( - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); - - bool FollowupHasTransforms = false; - MDNode *Followup = createPartialUnrollMetadata(Attrs, FollowupLoopProperties, - FollowupHasTransforms); - - SmallVector Args; - TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); - Args.push_back(TempNode.get()); - Args.append(LoopProperties.begin(), LoopProperties.end()); - - // Setting unroll_and_jam.count - if (Attrs.UnrollAndJamCount > 0) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), - ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), - Attrs.UnrollAndJamCount))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.enable")}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_outer"), - Followup})); - - if (UnrollAndJamInnerFollowup) - Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_inner"), - UnrollAndJamInnerFollowup})); - - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); - HasUserTransforms = true; - return LoopID; +void LoopInfoStack::initBuild(clang::ASTContext &ASTCtx, + llvm::LLVMContext &LLVMCtx, CGDebugInfo *DbgInfo, + clang::Stmt *Body) { + CGTransformedTreeBuilder Builder(ASTCtx, LLVMCtx, AllNodes, AllTransforms, + DbgInfo); + TransformedStructure = Builder.computeTransformedStructure(Body, StmtToTree); } -MDNode * -LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, - ArrayRef LoopProperties, - bool &HasUserTransforms) { - LLVMContext &Ctx = Header->getContext(); - - Optional Enabled; - if (Attrs.VectorizeEnable == LoopAttributes::Disable) - Enabled = false; - else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || - Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified || - Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) - Enabled = true; - - if (Enabled != true) { - SmallVector NewLoopProperties; - if (Enabled == false) { - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - NewLoopProperties.push_back( - MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt1Ty(Ctx), 0))})); - LoopProperties = NewLoopProperties; - } - return createUnrollAndJamMetadata(Attrs, LoopProperties, HasUserTransforms); - } - - // Apply all loop properties to the vectorized loop. - SmallVector FollowupLoopProperties; - FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - - // Don't vectorize an already vectorized loop. - FollowupLoopProperties.push_back( - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); - - bool FollowupHasTransforms = false; - MDNode *Followup = createUnrollAndJamMetadata(Attrs, FollowupLoopProperties, - FollowupHasTransforms); - - SmallVector Args; - TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); - Args.push_back(TempNode.get()); - Args.append(LoopProperties.begin(), LoopProperties.end()); - - // Setting vectorize.predicate - bool IsVectorPredicateEnabled = false; - if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified && - Attrs.VectorizeEnable != LoopAttributes::Disable && - Attrs.VectorizeWidth < 1) { - - IsVectorPredicateEnabled = - (Attrs.VectorizePredicateEnable == LoopAttributes::Enable); - - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.vectorize.predicate.enable"), - ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt1Ty(Ctx), - IsVectorPredicateEnabled))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - // Setting vectorize.width - if (Attrs.VectorizeWidth > 0) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.vectorize.width"), - ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), - Attrs.VectorizeWidth))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - // Setting interleave.count - if (Attrs.InterleaveCount > 0) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.interleave.count"), - ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), - Attrs.InterleaveCount))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - // Setting vectorize.enable - if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || - IsVectorPredicateEnabled) { - Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.vectorize.enable"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt1Ty(Ctx), - IsVectorPredicateEnabled - ? true - : (Attrs.VectorizeEnable == LoopAttributes::Enable)))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } - - if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, - {MDString::get(Ctx, "llvm.loop.vectorize.followup_all"), Followup})); - - MDNode *LoopID = MDNode::get(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); - HasUserTransforms = true; - return LoopID; -} - -MDNode * -LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, - ArrayRef LoopProperties, - bool &HasUserTransforms) { - LLVMContext &Ctx = Header->getContext(); - - Optional Enabled; - if (Attrs.DistributeEnable == LoopAttributes::Disable) - Enabled = false; - if (Attrs.DistributeEnable == LoopAttributes::Enable) - Enabled = true; - - if (Enabled != true) { - SmallVector NewLoopProperties; - if (Enabled == false) { - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - NewLoopProperties.push_back( - MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.distribute.enable"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt1Ty(Ctx), 0))})); - LoopProperties = NewLoopProperties; - } - return createLoopVectorizeMetadata(Attrs, LoopProperties, - HasUserTransforms); - } - - bool FollowupHasTransforms = false; - MDNode *Followup = - createLoopVectorizeMetadata(Attrs, LoopProperties, FollowupHasTransforms); - - SmallVector Args; - TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); - Args.push_back(TempNode.get()); - Args.append(LoopProperties.begin(), LoopProperties.end()); - - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), - ConstantAsMetadata::get(ConstantInt::get( - llvm::Type::getInt1Ty(Ctx), - (Attrs.DistributeEnable == LoopAttributes::Enable)))}; - Args.push_back(MDNode::get(Ctx, Vals)); - - if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, - {MDString::get(Ctx, "llvm.loop.distribute.followup_all"), Followup})); - - MDNode *LoopID = MDNode::get(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); - HasUserTransforms = true; - return LoopID; -} - -MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, - ArrayRef LoopProperties, - bool &HasUserTransforms) { - LLVMContext &Ctx = Header->getContext(); - - Optional Enabled; - if (Attrs.UnrollEnable == LoopAttributes::Disable) - Enabled = false; - else if (Attrs.UnrollEnable == LoopAttributes::Full) - Enabled = true; - - if (Enabled != true) { - SmallVector NewLoopProperties; - if (Enabled == false) { - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - NewLoopProperties.push_back( - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); - LoopProperties = NewLoopProperties; - } - return createLoopDistributeMetadata(Attrs, LoopProperties, - HasUserTransforms); - } - - SmallVector Args; - TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); - Args.push_back(TempNode.get()); - Args.append(LoopProperties.begin(), LoopProperties.end()); - Args.push_back(MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))); - - // No follow-up: there is no loop after full unrolling. - // TODO: Warn if there are transformations after full unrolling. - - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); - HasUserTransforms = true; - return LoopID; -} - -MDNode *LoopInfo::createMetadata( - const LoopAttributes &Attrs, - llvm::ArrayRef AdditionalLoopProperties, - bool &HasUserTransforms) { - SmallVector LoopProperties; - - // If we have a valid start debug location for the loop, add it. - if (StartLoc) { - LoopProperties.push_back(StartLoc.getAsMDNode()); - - // If we also have a valid end debug location for the loop, add it. - if (EndLoc) - LoopProperties.push_back(EndLoc.getAsMDNode()); - } - - assert(!!AccGroup == Attrs.IsParallel && - "There must be an access group iff the loop is parallel"); - if (Attrs.IsParallel) { - LLVMContext &Ctx = Header->getContext(); - LoopProperties.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); - } - - LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), - AdditionalLoopProperties.end()); - return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms); -} - -LoopAttributes::LoopAttributes(bool IsParallel) - : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), - UnrollEnable(LoopAttributes::Unspecified), - UnrollAndJamEnable(LoopAttributes::Unspecified), - VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), - InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), - DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), - PipelineInitiationInterval(0) {} - -void LoopAttributes::clear() { - IsParallel = false; - VectorizeWidth = 0; - InterleaveCount = 0; - UnrollCount = 0; - UnrollAndJamCount = 0; - VectorizeEnable = LoopAttributes::Unspecified; - UnrollEnable = LoopAttributes::Unspecified; - UnrollAndJamEnable = LoopAttributes::Unspecified; - VectorizePredicateEnable = LoopAttributes::Unspecified; - DistributeEnable = LoopAttributes::Unspecified; - PipelineDisabled = false; - PipelineInitiationInterval = 0; -} - -LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, - LoopInfo *Parent) - : Header(Header), Attrs(Attrs), StartLoc(StartLoc), EndLoc(EndLoc), - Parent(Parent) { - - if (Attrs.IsParallel) { - // Create an access group for this loop. - LLVMContext &Ctx = Header->getContext(); - AccGroup = MDNode::getDistinct(Ctx, {}); - } - - if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && - Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && - Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && - Attrs.PipelineInitiationInterval == 0 && - Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && - Attrs.VectorizeEnable == LoopAttributes::Unspecified && - Attrs.UnrollEnable == LoopAttributes::Unspecified && - Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && - Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc) - return; - - TempLoopID = MDNode::getTemporary(Header->getContext(), None); -} - -void LoopInfo::finish() { - // We did not annotate the loop body instructions because there are no - // attributes for this loop. - if (!TempLoopID) - return; - - MDNode *LoopID; - LoopAttributes CurLoopAttr = Attrs; - LLVMContext &Ctx = Header->getContext(); - - if (Parent && (Parent->Attrs.UnrollAndJamEnable || - Parent->Attrs.UnrollAndJamCount != 0)) { - // Parent unroll-and-jams this loop. - // Split the transformations in those that happens before the unroll-and-jam - // and those after. - - LoopAttributes BeforeJam, AfterJam; - - BeforeJam.IsParallel = AfterJam.IsParallel = Attrs.IsParallel; - - BeforeJam.VectorizeWidth = Attrs.VectorizeWidth; - BeforeJam.InterleaveCount = Attrs.InterleaveCount; - BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; - BeforeJam.DistributeEnable = Attrs.DistributeEnable; - BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; - - switch (Attrs.UnrollEnable) { - case LoopAttributes::Unspecified: - case LoopAttributes::Disable: - BeforeJam.UnrollEnable = Attrs.UnrollEnable; - AfterJam.UnrollEnable = Attrs.UnrollEnable; - break; - case LoopAttributes::Full: - BeforeJam.UnrollEnable = LoopAttributes::Full; - break; - case LoopAttributes::Enable: - AfterJam.UnrollEnable = LoopAttributes::Enable; - break; - } - - AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; - AfterJam.UnrollCount = Attrs.UnrollCount; - AfterJam.PipelineDisabled = Attrs.PipelineDisabled; - AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; - - // If this loop is subject of an unroll-and-jam by the parent loop, and has - // an unroll-and-jam annotation itself, we have to decide whether to first - // apply the parent's unroll-and-jam or this loop's unroll-and-jam. The - // UnrollAndJam pass processes loops from inner to outer, so we apply the - // inner first. - BeforeJam.UnrollAndJamCount = Attrs.UnrollAndJamCount; - BeforeJam.UnrollAndJamEnable = Attrs.UnrollAndJamEnable; - - // Set the inner followup metadata to process by the outer loop. Only - // consider the first inner loop. - if (!Parent->UnrollAndJamInnerFollowup) { - // Splitting the attributes into a BeforeJam and an AfterJam part will - // stop 'llvm.loop.isvectorized' (generated by vectorization in BeforeJam) - // to be forwarded to the AfterJam part. We detect the situation here and - // add it manually. - SmallVector BeforeLoopProperties; - if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || - BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified || - BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) - BeforeLoopProperties.push_back( - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); - - bool InnerFollowupHasTransform = false; - MDNode *InnerFollowup = createMetadata(AfterJam, BeforeLoopProperties, - InnerFollowupHasTransform); - if (InnerFollowupHasTransform) - Parent->UnrollAndJamInnerFollowup = InnerFollowup; - } - - CurLoopAttr = BeforeJam; - } - - bool HasUserTransforms = false; - LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms); - TempLoopID->replaceAllUsesWith(LoopID); -} - -void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc) { - Active.emplace_back( - new LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, - Active.empty() ? nullptr : Active.back().get())); - // Clear the attributes so nested loops do not inherit them. - StagedAttrs.clear(); -} - -void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, - ArrayRef Attrs, - const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc) { - - // Identify loop hint attributes from Attrs. - for (const auto *Attr : Attrs) { - const LoopHintAttr *LH = dyn_cast(Attr); - const OpenCLUnrollHintAttr *OpenCLHint = - dyn_cast(Attr); - - // Skip non loop hint attributes - if (!LH && !OpenCLHint) { - continue; - } - - LoopHintAttr::OptionType Option = LoopHintAttr::Unroll; - LoopHintAttr::LoopHintState State = LoopHintAttr::Disable; - unsigned ValueInt = 1; - // Translate opencl_unroll_hint attribute argument to - // equivalent LoopHintAttr enums. - // OpenCL v2.0 s6.11.5: - // 0 - enable unroll (no argument). - // 1 - disable unroll. - // other positive integer n - unroll by n. - if (OpenCLHint) { - ValueInt = OpenCLHint->getUnrollHint(); - if (ValueInt == 0) { - State = LoopHintAttr::Enable; - } else if (ValueInt != 1) { - Option = LoopHintAttr::UnrollCount; - State = LoopHintAttr::Numeric; - } - } else if (LH) { - auto *ValueExpr = LH->getValue(); - if (ValueExpr) { - llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx); - ValueInt = ValueAPS.getSExtValue(); - } - - Option = LH->getOption(); - State = LH->getState(); - } - switch (State) { - case LoopHintAttr::Disable: - switch (Option) { - case LoopHintAttr::Vectorize: - // Disable vectorization by specifying a width of 1. - setVectorizeWidth(1); - break; - case LoopHintAttr::Interleave: - // Disable interleaving by speciyfing a count of 1. - setInterleaveCount(1); - break; - case LoopHintAttr::Unroll: - setUnrollState(LoopAttributes::Disable); - break; - case LoopHintAttr::UnrollAndJam: - setUnrollAndJamState(LoopAttributes::Disable); - break; - case LoopHintAttr::VectorizePredicate: - setVectorizePredicateState(LoopAttributes::Disable); - break; - case LoopHintAttr::Distribute: - setDistributeState(false); - break; - case LoopHintAttr::PipelineDisabled: - setPipelineDisabled(true); - break; - case LoopHintAttr::UnrollCount: - case LoopHintAttr::UnrollAndJamCount: - case LoopHintAttr::VectorizeWidth: - case LoopHintAttr::InterleaveCount: - case LoopHintAttr::PipelineInitiationInterval: - llvm_unreachable("Options cannot be disabled."); - break; - } - break; - case LoopHintAttr::Enable: - switch (Option) { - case LoopHintAttr::Vectorize: - case LoopHintAttr::Interleave: - setVectorizeEnable(true); - break; - case LoopHintAttr::Unroll: - setUnrollState(LoopAttributes::Enable); - break; - case LoopHintAttr::UnrollAndJam: - setUnrollAndJamState(LoopAttributes::Enable); - break; - case LoopHintAttr::VectorizePredicate: - setVectorizePredicateState(LoopAttributes::Enable); - break; - case LoopHintAttr::Distribute: - setDistributeState(true); - break; - case LoopHintAttr::UnrollCount: - case LoopHintAttr::UnrollAndJamCount: - case LoopHintAttr::VectorizeWidth: - case LoopHintAttr::InterleaveCount: - case LoopHintAttr::PipelineDisabled: - case LoopHintAttr::PipelineInitiationInterval: - llvm_unreachable("Options cannot enabled."); - break; - } - break; - case LoopHintAttr::AssumeSafety: - switch (Option) { - case LoopHintAttr::Vectorize: - case LoopHintAttr::Interleave: - // Apply "llvm.mem.parallel_loop_access" metadata to load/stores. - setParallel(true); - setVectorizeEnable(true); - break; - case LoopHintAttr::Unroll: - case LoopHintAttr::UnrollAndJam: - case LoopHintAttr::VectorizePredicate: - case LoopHintAttr::UnrollCount: - case LoopHintAttr::UnrollAndJamCount: - case LoopHintAttr::VectorizeWidth: - case LoopHintAttr::InterleaveCount: - case LoopHintAttr::Distribute: - case LoopHintAttr::PipelineDisabled: - case LoopHintAttr::PipelineInitiationInterval: - llvm_unreachable("Options cannot be used to assume mem safety."); - break; - } - break; - case LoopHintAttr::Full: - switch (Option) { - case LoopHintAttr::Unroll: - setUnrollState(LoopAttributes::Full); - break; - case LoopHintAttr::UnrollAndJam: - setUnrollAndJamState(LoopAttributes::Full); - break; - case LoopHintAttr::Vectorize: - case LoopHintAttr::Interleave: - case LoopHintAttr::UnrollCount: - case LoopHintAttr::UnrollAndJamCount: - case LoopHintAttr::VectorizeWidth: - case LoopHintAttr::InterleaveCount: - case LoopHintAttr::Distribute: - case LoopHintAttr::PipelineDisabled: - case LoopHintAttr::PipelineInitiationInterval: - case LoopHintAttr::VectorizePredicate: - llvm_unreachable("Options cannot be used with 'full' hint."); - break; - } - break; - case LoopHintAttr::Numeric: - switch (Option) { - case LoopHintAttr::VectorizeWidth: - setVectorizeWidth(ValueInt); - break; - case LoopHintAttr::InterleaveCount: - setInterleaveCount(ValueInt); - break; - case LoopHintAttr::UnrollCount: - setUnrollCount(ValueInt); - break; - case LoopHintAttr::UnrollAndJamCount: - setUnrollAndJamCount(ValueInt); - break; - case LoopHintAttr::PipelineInitiationInterval: - setPipelineInitiationInterval(ValueInt); - break; - case LoopHintAttr::Unroll: - case LoopHintAttr::UnrollAndJam: - case LoopHintAttr::VectorizePredicate: - case LoopHintAttr::Vectorize: - case LoopHintAttr::Interleave: - case LoopHintAttr::Distribute: - case LoopHintAttr::PipelineDisabled: - llvm_unreachable("Options cannot be assigned a value."); - break; - } - break; - } - } - - /// Stage the attributes. - push(Header, StartLoc, EndLoc); +void LoopInfoStack::push(BasicBlock *Header, const clang::Stmt *LoopStmt) { + auto Loop = getAssociatedLoop(LoopStmt); + auto TreeNode = StmtToTree.lookup(Loop); + Active.emplace_back(new LoopInfo(Header, TreeNode)); } void LoopInfoStack::pop() { assert(!Active.empty() && "No active loops to pop"); - Active.back()->finish(); Active.pop_back(); } diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp --- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -464,6 +464,7 @@ II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr, SC_PrivateExtern, false, false); CodeGenFunction NewCGF(CGM); + NewCGF.LoopStack.initAsOutlined(NewCGF.LoopStack); setCGF(&NewCGF); CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -239,7 +239,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: @@ -689,7 +690,8 @@ /// found along the way. /// \param S Starting statement. /// \param ParentName Name of the function declaration that is being scanned. - void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName); + void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName, + const FunctionDecl *ParentFn); /// Build type kmp_routine_entry_t (if not built yet). void emitKmpRoutineEntryT(QualType KmpInt32Ty); @@ -848,7 +850,8 @@ /// \param CodeGen Code generation sequence for the \a D directive. virtual llvm::Function *emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emits outlined function for the specified OpenMP teams directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, @@ -860,7 +863,8 @@ /// \param CodeGen Code generation sequence for the \a D directive. virtual llvm::Function *emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* @@ -881,7 +885,7 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts); + bool Tied, unsigned &NumberOfParts, const FunctionDecl *ParentFn); /// Cleans up references to the objects in finished function. /// @@ -1432,7 +1436,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of @@ -1679,11 +1684,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitParallelOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits outlined function for the specified OpenMP teams directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, @@ -1693,11 +1697,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* @@ -1718,7 +1721,8 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) override; + bool Tied, unsigned &NumberOfParts, + const FunctionDecl *ParentFn) override; /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -2122,7 +2126,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) override; + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1439,10 +1439,11 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, - const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { + const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); - CodeGenFunction CGF(CGM, true); + CodeGenFunction CGF(CGM, true, ParentFn); bool HasCancel = false; if (const auto *OPD = dyn_cast(&D)) HasCancel = OPD->hasCancel(); @@ -1468,25 +1469,29 @@ llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen, + ParentFn); } llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen, + ParentFn); } llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) { + bool Tied, unsigned &NumberOfParts, const FunctionDecl *ParentFn) { auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, PrePostActionTy &) { llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); @@ -1508,7 +1513,7 @@ : OMPD_task; const CapturedStmt *CS = D.getCapturedStmt(Region); const auto *TD = dyn_cast(&D); - CodeGenFunction CGF(CGM, true); + CodeGenFunction CGF(CGM, true, ParentFn); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, TD ? TD->hasCancel() : false, Action); @@ -6461,17 +6466,19 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { assert(!ParentName.empty() && "Invalid target region parent name!"); HasEmittedTargetRegion = true; emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, - IsOffloadEntry, CodeGen); + IsOffloadEntry, CodeGen, ParentFn); } void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { // Create a unique name for the entry function using the source location // information of the current target region. The name will be something like: // @@ -6495,7 +6502,7 @@ const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - CodeGenFunction CGF(CGM, true); + CodeGenFunction CGF(CGM, true, ParentFn); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -9437,8 +9444,8 @@ } } -void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, - StringRef ParentName) { +void CGOpenMPRuntime::scanForTargetRegionsFunctions( + const Stmt *S, StringRef ParentName, const FunctionDecl *ParentFn) { if (!S) return; @@ -9464,47 +9471,51 @@ switch (E.getDirectiveKind()) { case OMPD_target: - CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, - cast(E)); + CodeGenFunction::EmitOMPTargetDeviceFunction( + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_parallel: CodeGenFunction::EmitOMPTargetParallelDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_teams: CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_teams_distribute: CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), + ParentFn); break; case OMPD_target_teams_distribute_simd: CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), + ParentFn); break; case OMPD_target_parallel_for: CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_parallel_for_simd: CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), + ParentFn); break; case OMPD_target_simd: CodeGenFunction::EmitOMPTargetSimdDeviceFunction( - CGM, ParentName, cast(E)); + CGM, ParentName, cast(E), ParentFn); break; case OMPD_target_teams_distribute_parallel_for: CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CGM, ParentName, - cast(E)); + cast(E), ParentFn); break; case OMPD_target_teams_distribute_parallel_for_simd: CodeGenFunction:: EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( CGM, ParentName, - cast(E)); + cast(E), + ParentFn); break; case OMPD_parallel: case OMPD_for: @@ -9567,7 +9578,7 @@ return; scanForTargetRegionsFunctions( - E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); + E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName, ParentFn); return; } @@ -9577,7 +9588,7 @@ // Keep looking for target regions recursively. for (const Stmt *II : S->children()) - scanForTargetRegionsFunctions(II, ParentName); + scanForTargetRegionsFunctions(II, ParentName, ParentFn); } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { @@ -9598,7 +9609,7 @@ StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. if (const auto *FD = dyn_cast(VD)) { - scanForTargetRegionsFunctions(FD->getBody(), Name); + scanForTargetRegionsFunctions(FD->getBody(), Name, FD); Optional DevTy = OMPDeclareTargetDeclAttr::getDeviceType(FD); // Do not emit device_type(nohost) functions for the host. @@ -9623,12 +9634,12 @@ for (const CXXConstructorDecl *Ctor : RD->ctors()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); - scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); + scanForTargetRegionsFunctions(Ctor->getBody(), ParentName, Ctor); } if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); - scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); + scanForTargetRegionsFunctions(Dtor->getBody(), ParentName, Dtor); } } @@ -11114,13 +11125,15 @@ llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -11128,7 +11141,7 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) { + bool Tied, unsigned &NumberOfParts, const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -11328,7 +11341,8 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -110,7 +110,8 @@ void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emit outlined function specialized for the Single Program /// Multiple Data programming model for applicable target directives on the @@ -126,7 +127,8 @@ void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn); /// Emit outlined function for 'target' directive on the NVPTX /// device. @@ -142,7 +144,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) override; + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -240,11 +243,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitParallelOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits inlined function for the specified OpenMP teams // directive. @@ -255,11 +257,10 @@ /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) override; /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1144,7 +1144,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) { + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); EntryFunctionState EST; WorkerFunctionState WST(CGM, D.getBeginLoc()); @@ -1188,7 +1189,7 @@ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, - IsOffloadEntry, CodeGen); + IsOffloadEntry, CodeGen, ParentFn); IsInTTDRegion = false; // Now change the name of the worker function to correspond to this target @@ -1276,7 +1277,8 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, - const RegionCodeGenTy &CodeGen) { + const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { ExecutionRuntimeModesRAII ModeRAII( CurrentExecutionMode, RequiresFullRuntime, CGM.getLangOpts().OpenMPCUDAForceFullRuntime || @@ -1318,7 +1320,7 @@ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, - IsOffloadEntry, CodeGen); + IsOffloadEntry, CodeGen, ParentFn); IsInTTDRegion = false; } @@ -1860,7 +1862,8 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { if (!IsOffloadEntry) // Nothing to do. return; @@ -1869,10 +1872,10 @@ bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); if (Mode) emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, - CodeGen); + CodeGen, ParentFn); else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, - CodeGen); + CodeGen, ParentFn); setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } @@ -1954,7 +1957,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { bool &IsInParallelRegion; @@ -1978,7 +1982,7 @@ IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast(CGOpenMPRuntime::emitParallelOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen)); + D, ThreadIDVar, InnermostKind, CodeGen, ParentFn)); if (CGM.getLangOpts().Optimize) { OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); @@ -2036,7 +2040,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const FunctionDecl *ParentFn) { SourceLocation Loc = D.getBeginLoc(); const RecordDecl *GlobalizedRD = nullptr; @@ -2099,7 +2104,7 @@ } Action(Loc, GlobalizedRD, MappedDeclsFields); CodeGen.setAction(Action); llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen); + D, ThreadIDVar, InnermostKind, CodeGen, ParentFn); if (CGM.getLangOpts().Optimize) { OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -355,7 +355,7 @@ cast(*S)); break; case Stmt::TransformExecutableDirectiveClass: - llvm_unreachable("not implemented"); + EmitTransformExecutableDirective(cast(*S)); break; } } @@ -720,10 +720,7 @@ JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond"); EmitBlock(LoopHeader.getBlock()); - const SourceRange &R = S.getSourceRange(); - LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs, - SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + LoopStack.push(LoopHeader.getBlock(), &S); // Create an exit block for when the condition fails, which will // also become the break target. @@ -822,10 +819,7 @@ EmitBlock(LoopCond.getBlock()); - const SourceRange &R = S.getSourceRange(); - LoopStack.push(LoopBody, CGM.getContext(), DoAttrs, - SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + LoopStack.push(LoopBody, &S); // C99 6.8.5.2: "The evaluation of the controlling expression takes place // after each execution of the loop body." @@ -880,10 +874,7 @@ llvm::BasicBlock *CondBlock = Continue.getBlock(); EmitBlock(CondBlock); - const SourceRange &R = S.getSourceRange(); - LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, - SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + LoopStack.push(CondBlock, &S); // If the for loop doesn't have an increment we can just use the // condition as the continue block. Otherwise we'll need to create @@ -981,10 +972,7 @@ llvm::BasicBlock *CondBlock = createBasicBlock("for.cond"); EmitBlock(CondBlock); - const SourceRange &R = S.getSourceRange(); - LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, - SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + LoopStack.push(CondBlock, &S); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -2430,6 +2418,7 @@ // Generate the function. StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), CD->getBody()->getBeginLoc()); + HandleCodeTransformations(&S); // Set the context parameter in CapturedStmtInfo. Address DeclPtr = GetAddrOfLocalVar(CD->getContextParam()); CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -482,6 +482,7 @@ // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); + CGF.HandleCodeTransformations(CD->getBody()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (const FieldDecl *FD : RD->fields()) { @@ -598,7 +599,7 @@ FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, /*RegisterCastedArgsOnly=*/true, CapturedStmtInfo->getHelperName()); - CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); + CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true, ParentFn); WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; Args.clear(); LocalAddrs.clear(); @@ -1275,7 +1276,8 @@ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen, + CGF.getParentFn()); if (const auto *NumThreadsClause = S.getSingleClause()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); llvm::Value *NumThreads = @@ -1429,9 +1431,7 @@ // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.inner.for.cond"); EmitBlock(CondBlock); - const SourceRange R = S.getSourceRange(); - LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + LoopStack.push(CondBlock, &S); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -1701,39 +1701,8 @@ } } -static void emitSimdlenSafelenClause(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - bool IsMonotonic) { - if (!CGF.HaveInsertPoint()) - return; - if (const auto *C = D.getSingleClause()) { - RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), - /*ignoreResult=*/true); - auto *Val = cast(Len.getScalarVal()); - CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); - // In presence of finite 'safelen', it may be unsafe to mark all - // the memory instructions parallel, because loop-carried - // dependences of 'safelen' iterations are possible. - if (!IsMonotonic) - CGF.LoopStack.setParallel(!D.getSingleClause()); - } else if (const auto *C = D.getSingleClause()) { - RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), - /*ignoreResult=*/true); - auto *Val = cast(Len.getScalarVal()); - CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); - // In presence of finite 'safelen', it may be unsafe to mark all - // the memory instructions parallel, because loop-carried - // dependences of 'safelen' iterations are possible. - CGF.LoopStack.setParallel(/*Enable=*/false); - } -} - void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic) { - // Walk clauses and process safelen/lastprivate. - LoopStack.setParallel(!IsMonotonic); - LoopStack.setVectorizeEnable(); - emitSimdlenSafelenClause(*this, D, IsMonotonic); } void CodeGenFunction::EmitOMPSimdFinal( @@ -1908,9 +1877,7 @@ // Start the loop with a block that tests the condition. llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); EmitBlock(CondBlock); - const SourceRange R = S.getSourceRange(); - LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + LoopStack.push(CondBlock, &S); llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { @@ -1951,11 +1918,7 @@ JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - // Generate !llvm.loop.parallel metadata for loads and stores for loops - // with dynamic/guided scheduling and without ordered clause. - if (!isOpenMPSimdDirective(S.getDirectiveKind())) - LoopStack.setParallel(!IsMonotonic); - else + if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getBeginLoc(); @@ -2305,7 +2268,8 @@ } void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( - CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { + CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S, + const FunctionDecl *ParentFn) { // Emit SPMD target parallel for region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitOMPSimdRegion(CGF, S, Action); @@ -2314,7 +2278,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -3152,7 +3116,7 @@ }; llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, - Data.NumberOfParts); + Data.NumberOfParts, ParentFn); OMPLexicalScope Scope(*this, S, llvm::None, !isOpenMPParallelDirective(S.getDirectiveKind())); TaskGen(*this, OutlinedFn, Data); @@ -3295,7 +3259,7 @@ }; llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, - Data.NumberOfParts); + Data.NumberOfParts, ParentFn); llvm::APInt TrueOrFalse(32, S.hasClausesOfKind() ? 1 : 0); IntegerLiteral IfCond(getContext(), TrueOrFalse, getContext().getIntTypeForBitwidth(32, /*Signed=*/0), @@ -3629,9 +3593,10 @@ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } -static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, - const CapturedStmt *S) { - CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); +static llvm::Function * +emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S, + const FunctionDecl *ParentFn) { + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true, ParentFn); CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; CGF.CapturedStmtInfo = &CapStmtInfo; llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); @@ -3654,7 +3619,8 @@ if (C) { llvm::SmallVector CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + llvm::Function *OutlinedFn = + emitOutlinedOrderedFunction(CGM, CS, ParentFn); CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars); } else { @@ -4199,8 +4165,8 @@ CGM.getMangledName(GlobalDecl(cast(CGF.CurFuncDecl))); // Emit target region as a standalone region. - CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, - IsOffloadEntry, CodeGen); + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen, CGF.getParentFn()); OMPLexicalScope Scope(CGF, S, OMPD_task); auto &&SizeEmitter = [IsOffloadEntry](CodeGenFunction &CGF, @@ -4232,9 +4198,9 @@ CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); } -void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, - StringRef ParentName, - const OMPTargetDirective &S) { +void CodeGenFunction::EmitOMPTargetDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S, + const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetRegion(CGF, S, Action); }; @@ -4242,7 +4208,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4260,7 +4226,8 @@ const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen, + CGF.getParentFn()); const auto *NT = S.getSingleClause(); const auto *TL = S.getSingleClause(); @@ -4319,8 +4286,8 @@ } void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( - CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDirective &S) { + CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S, + const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetTeamsRegion(CGF, Action, S); }; @@ -4328,7 +4295,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4366,7 +4333,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeDirective &S) { + const OMPTargetTeamsDistributeDirective &S, const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetTeamsDistributeRegion(CGF, Action, S); }; @@ -4374,7 +4341,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4412,7 +4379,8 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeSimdDirective &S) { + const OMPTargetTeamsDistributeSimdDirective &S, + const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetTeamsDistributeSimdRegion(CGF, Action, S); }; @@ -4420,7 +4388,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4552,7 +4520,8 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForDirective &S) { + const OMPTargetTeamsDistributeParallelForDirective &S, + const FunctionDecl *ParentFn) { // Emit SPMD target teams distribute parallel for region as a standalone // region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -4562,7 +4531,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4604,7 +4573,8 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + const OMPTargetTeamsDistributeParallelForSimdDirective &S, + const FunctionDecl *ParentFn) { // Emit SPMD target teams distribute parallel for simd region as a standalone // region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -4614,7 +4584,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4882,7 +4852,7 @@ void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelDirective &S) { + const OMPTargetParallelDirective &S, const FunctionDecl *ParentFn) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetParallelRegion(CGF, S, Action); }; @@ -4890,7 +4860,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4921,7 +4891,7 @@ void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForDirective &S) { + const OMPTargetParallelForDirective &S, const FunctionDecl *ParentFn) { // Emit SPMD target parallel for region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetParallelForRegion(CGF, S, Action); @@ -4930,7 +4900,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } @@ -4960,7 +4930,7 @@ void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForSimdDirective &S) { + const OMPTargetParallelForSimdDirective &S, const FunctionDecl *ParentFn) { // Emit SPMD target parallel for region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitTargetParallelForSimdRegion(CGF, S, Action); @@ -4969,7 +4939,7 @@ llvm::Constant *Addr; // Emit target region as a standalone region. CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen, ParentFn); assert(Fn && Addr && "Target device function emission failed."); } diff --git a/clang/lib/CodeGen/CGTransform.h b/clang/lib/CodeGen/CGTransform.h new file mode 100644 --- /dev/null +++ b/clang/lib/CodeGen/CGTransform.h @@ -0,0 +1,154 @@ +//===---- CGTransform.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Emitting metadata for loop transformations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGTRANSFORM_H +#define LLVM_CLANG_LIB_CODEGEN_CGTRANSFORM_H + +#include "clang/Analysis/TransformedTree.h" +#include "llvm/IR/DebugLoc.h" + +namespace clang { +namespace CodeGen { +class CGDebugInfo; +class CGTransformedTreeBuilder; + +class CGTransformedTree : public TransformedTree { + friend class CGTransformedTreeBuilder; + friend class TransformedTree; + using BaseTy = TransformedTree; + using NodeTy = CGTransformedTree; + + BaseTy &getBase() { return *this; } + const BaseTy &getBase() const { return *this; } + + llvm::DebugLoc BeginLoc; + llvm::DebugLoc EndLoc; + + llvm::MDNode *AccessGroup = nullptr; + llvm::SmallSetVector ParallelAccessGroups; + +public: + CGTransformedTree(llvm::ArrayRef SubLoops, NodeTy *BasedOn, + clang::Stmt *Original, int FollowupRole) + : TransformedTree(SubLoops, BasedOn, Original, FollowupRole) {} + + bool IsDefault = true; + bool DisableHeuristic = false; + + llvm::SmallVector Attributes; + llvm::SmallVector Transforms; + llvm::SmallVector, 4> FollowupAttributes; + + bool collectLoopProperties(llvm::SmallVectorImpl &Props); + void addAttribute(bool Inherited, llvm::Metadata *Node); + +public: + void markNondefault() { IsDefault = false; } + void markDisableHeuristic() { DisableHeuristic = true; } + + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::ArrayRef Vals); + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name); + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, bool Val); + void addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, int Val); + + llvm::MDNode *getAccessGroupOrNull() { + assert(getOriginal()); + return AccessGroup; + } + void + getOrCreateAccessGroups(llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AccessGroups); + llvm::ArrayRef getParallelAccessGroups() const { + return ParallelAccessGroups.getArrayRef(); + } + + llvm::MDNode *makeLoopID(llvm::LLVMContext &Ctx, bool HasAllDisableNonforced); +}; + +class CGTransformedTreeBuilder + : public TransformedTreeBuilder { + using BaseTy = + TransformedTreeBuilder; + using NodeTy = CGTransformedTree; + + BaseTy &getBase() { return *this; } + const BaseTy &getBase() const { return *this; } + + llvm::LLVMContext &LLVMCtx; + CGDebugInfo *DbgInfo; + +public: + CGTransformedTreeBuilder(ASTContext &ASTCtx, llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AllNodes, + llvm::SmallVectorImpl &AllTransforms, + CGDebugInfo *DbgInfo) + : TransformedTreeBuilder(ASTCtx, AllNodes, AllTransforms), + LLVMCtx(LLVMCtx), DbgInfo(DbgInfo) {} + + // Ignore any diagnostic and its arguments. + struct DummyDiag { + template DummyDiag operator<<(const T &) const { return {}; } + }; + DummyDiag Diag(SourceLocation Loc, unsigned DiagID) { return {}; } + + void applyOriginal(CGTransformedTree *L); + + void inheritLoopAttributes(CGTransformedTree *Dst, CGTransformedTree *Src, + bool IsAll, bool IsSuccessor); + void markParallel(CGTransformedTree *L); + + void disableUnroll(CGTransformedTree *L) { + L->addAttribute(LLVMCtx, true, "llvm.loop.unroll.disable"); + L->markNondefault(); + } + + void disableUnrollAndJam(CGTransformedTree *L) { + L->addAttribute(LLVMCtx, true, "llvm.loop.unroll_and_jam.disable"); + L->markNondefault(); + } + + void disableDistribution(CGTransformedTree *L) { + L->addAttribute(LLVMCtx, true, "llvm.loop.distribute.enable", false); + L->markNondefault(); + } + + void disableVectorizeInterleave(CGTransformedTree *L) { + L->addAttribute(LLVMCtx, true, "llvm.loop.vectorize.width", 1); + L->markNondefault(); + } + + void disablePipelining(CGTransformedTree *L) { + L->addAttribute(LLVMCtx, true, "llvm.loop.pipeline.disable", true); + L->markNondefault(); + } + + void applyUnroll(LoopUnrollingTransform *Trans, + CGTransformedTree *OriginalLoop); + void applyUnrollAndJam(LoopUnrollAndJamTransform *Trans, + CGTransformedTree *OuterLoop, + CGTransformedTree *InnerLoop); + void applyDistribution(LoopDistributionTransform *Trans, + CGTransformedTree *OriginalLoop); + void applyVectorizeInterleave(LoopVectorizationInterleavingTransform *Trans, + CGTransformedTree *MainLoop); + void applyPipelining(LoopPipeliningTransform *Trans, + CGTransformedTree *MainLoop); +}; + +} // namespace CodeGen +} // namespace clang +#endif /* LLVM_CLANG_LIB_CODEGEN_CGTRANSFORM_H */ diff --git a/clang/lib/CodeGen/CGTransform.cpp b/clang/lib/CodeGen/CGTransform.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/CodeGen/CGTransform.cpp @@ -0,0 +1,396 @@ +//===---- CGTransform.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Emitting metadata for loop transformations. +// +//===----------------------------------------------------------------------===// + +#include "CGTransform.h" +#include "CGDebugInfo.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtTransform.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" + +using namespace clang; +using namespace clang::CodeGen; +using namespace llvm; + +void CodeGenFunction::HandleCodeTransformations(const Stmt *Body) { + if (!getParentFn()) { + // Transformations not supported for e.g. Objective-C + return; + } + + assert(CurFn && "Must be called after StartFunction"); + assert(Body); + + LoopStack.initBuild(getContext(), getLLVMContext(), DebugInfo, + getParentFn()->getBody()); +} + +void CodeGenFunction::EmitTransformExecutableDirective( + const TransformExecutableDirective &D) { + EmitStmt(D.getAssociated()); +} + +bool CGTransformedTree::collectLoopProperties( + llvm::SmallVectorImpl &Props) { + for (Metadata *M : this->Attributes) + Props.push_back(M); + for (Metadata *M : this->Transforms) + Props.push_back(M); + return !Props.empty(); +} + +void CGTransformedTree::addAttribute(bool Inherited, llvm::Metadata *Node) { + if (Inherited) + Attributes.push_back(Node); + else + Transforms.push_back(Node); +} + +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::ArrayRef Vals) { + addAttribute(Inherited, MDNode::get(LLVMCtx, Vals)); +} + +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name) { + addAttribute(LLVMCtx, Inherited, {MDString::get(LLVMCtx, Name)}); +} + +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, bool Val) { + addAttribute(LLVMCtx, Inherited, + {MDString::get(LLVMCtx, Name), + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt1Ty(LLVMCtx), Val))}); +} +void CGTransformedTree::addAttribute(llvm::LLVMContext &LLVMCtx, bool Inherited, + llvm::StringRef Name, int Val) { + addAttribute(LLVMCtx, Inherited, + {MDString::get(LLVMCtx, Name), + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt32Ty(LLVMCtx), Val))}); +} + +void CGTransformedTree::getOrCreateAccessGroups( + llvm::LLVMContext &LLVMCtx, + llvm::SmallVectorImpl &AccessGroups) { + if (getOriginal()) { + if (!AccessGroup) + AccessGroup = MDNode::getDistinct(LLVMCtx, {}); + AccessGroups.push_back(AccessGroup); + return; + } + + getBasedOn()->getOrCreateAccessGroups(LLVMCtx, AccessGroups); +} + +llvm::MDNode *CGTransformedTree::makeLoopID(llvm::LLVMContext &Ctx, + bool HasAllDisableNonforced) { + if (IsDefault && (!DisableHeuristic || HasAllDisableNonforced)) + return nullptr; + + SmallVector Args; + + // Reserve operand 0 for loop id self reference. + Args.push_back(nullptr); + + if (BeginLoc) { + Args.push_back(BeginLoc.getAsMDNode()); + + // If we also have a valid end debug location for the loop, add it. + if (EndLoc) + Args.push_back(EndLoc.getAsMDNode()); + } + + if (!ParallelAccessGroups.empty()) { + SmallVector ArgOpts; + ArgOpts.reserve(ParallelAccessGroups.size()); + ArgOpts.push_back(MDString::get(Ctx, "llvm.loop.parallel_accesses")); + ArgOpts.insert(ArgOpts.end(), ParallelAccessGroups.begin(), + ParallelAccessGroups.end()); + Args.push_back(MDNode::get(Ctx, ArgOpts)); + } + + collectLoopProperties(Args); + + bool AllIsDisableHeuristic = false; + bool OtherIsNondefault = false; + for (auto P : FollowupAttributes) { + int Role = P.second->FollowupRole; + if (TransformedBy->isAllRole(Role)) { + if (P.second->DisableHeuristic) + AllIsDisableHeuristic = true; + } else { + if (!P.second->IsDefault) + OtherIsNondefault = true; + } + } + + for (auto P : FollowupAttributes) { + StringRef FollowupName = P.first; + NodeTy *FollowupNode = P.second; + llvm::MDNode *FollowupId; + if (TransformedBy->isAllRole(FollowupNode->FollowupRole)) { + if (OtherIsNondefault) + FollowupNode->markNondefault(); + FollowupId = FollowupNode->makeLoopID(Ctx, false); + } else { + FollowupId = FollowupNode->makeLoopID(Ctx, AllIsDisableHeuristic); + } + if (!FollowupId) + continue; + + Args.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, FollowupName), FollowupId})); + } + + if (DisableHeuristic && !HasAllDisableNonforced) + Args.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.disable_nonforced")})); + + // No need for an MDNode if it is empty. + if (Args.size() <= 1) + return nullptr; + + // Set the first operand to itself. + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + return LoopID; +} + +void CGTransformedTreeBuilder::applyOriginal(CGTransformedTree *L) { + if (!DbgInfo) + return; + + L->BeginLoc = DbgInfo->SourceLocToDebugLoc(L->getOriginal()->getBeginLoc()); + L->EndLoc = DbgInfo->SourceLocToDebugLoc(L->getOriginal()->getEndLoc()); + if (L->BeginLoc || L->EndLoc) + L->markNondefault(); +} + +void CGTransformedTreeBuilder::inheritLoopAttributes(CGTransformedTree *Dst, + CGTransformedTree *Src, + bool IsAll, + bool IsSuccessor) { + Dst->BeginLoc = Src->BeginLoc; + Dst->EndLoc = Src->EndLoc; + + for (auto A : Src->Attributes) { + // TOOD: Check for duplicates? + Dst->Attributes.push_back(A); + } + + // We currently assume that every transformation of a parallel loop also + // results in a parallel loop. + Dst->ParallelAccessGroups.insert(Src->ParallelAccessGroups.begin(), + Src->ParallelAccessGroups.end()); +} + +void CGTransformedTreeBuilder::markParallel(CGTransformedTree *L) { + getBase().markParallel(L); + + // Has it already been marked parallel? + // Avoid redundant metadata if it was. + if (!L->ParallelAccessGroups.empty()) + return; + + SmallVector AccGroups; + assert(L->ParallelAccessGroups.empty() && + "Should not have parallel access groups if was empty before"); + + L->getOrCreateAccessGroups(LLVMCtx, AccGroups); + L->ParallelAccessGroups.insert(AccGroups.begin(), AccGroups.end()); +} + +void CGTransformedTreeBuilder::applyUnroll(LoopUnrollingTransform *Trans, + CGTransformedTree *OriginalLoop) { + if (Trans->isExplicitEnable() && !(Trans->isLegacy() && Trans->isFull())) + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll.enable"); + + if (Trans->isFull()) { + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll.full"); + } else { + int UnrollFactor = Trans->getFactor(); + if (UnrollFactor > 0) + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll.count", + UnrollFactor); + } + + for (CGTransformedTree *F : OriginalLoop->Followups) { + switch (F->FollowupRole) { + case LoopUnrollingTransform::FollowupAll: + OriginalLoop->FollowupAttributes.emplace_back(LLVMLoopUnrollFollowupAll, + F); + if (Trans->isLegacy()) + F->addAttribute(LLVMCtx, true, "llvm.loop.unroll.disable"); + else + F->markDisableHeuristic(); + break; + case LoopUnrollingTransform::FollowupUnrolled: + OriginalLoop->FollowupAttributes.emplace_back( + LLVMLoopUnrollFollowupUnrolled, F); + break; + case LoopUnrollingTransform::FollowupRemainder: + OriginalLoop->FollowupAttributes.emplace_back( + LLVMLoopUnrollFollowupRemainder, F); + break; + } + } + + OriginalLoop->markNondefault(); + if (!Trans->isLegacy()) + OriginalLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyUnrollAndJam( + LoopUnrollAndJamTransform *Trans, CGTransformedTree *OuterLoop, + CGTransformedTree *InnerLoop) { + int Factor = Trans->getFactor(); + if (Factor > 0) + OuterLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll_and_jam.count", + Factor); + + if (Trans->isExplicitEnable()) + OuterLoop->addAttribute(LLVMCtx, false, "llvm.loop.unroll_and_jam.enable"); + + for (CGTransformedTree *F : OuterLoop->Followups) { + switch (F->FollowupRole) { + case LoopUnrollAndJamTransform::FollowupAll: + OuterLoop->FollowupAttributes.emplace_back( + "llvm.loop.unroll_and_jam.followup_all", F); + if (!Trans->isLegacy()) + F->markDisableHeuristic(); + break; + case LoopUnrollAndJamTransform::FollowupOuter: + OuterLoop->FollowupAttributes.emplace_back( + "llvm.loop.unroll_and_jam.followup_outer", F); + if (!Trans->isLegacy()) + F->markDisableHeuristic(); + if (Trans->isLegacy()) + F->addAttribute(LLVMCtx, true, "llvm.loop.unroll_and_jam.disable"); + break; + } + } + + if (InnerLoop) { + for (CGTransformedTree *F : InnerLoop->Followups) { + switch (F->FollowupRole) { + case LoopUnrollAndJamTransform::FollowupInner: + if (!Trans->isLegacy()) + F->markDisableHeuristic(); + OuterLoop->FollowupAttributes.emplace_back( + "llvm.loop.unroll_and_jam.followup_inner", F); + break; + } + } + } + + OuterLoop->markNondefault(); + if (!Trans->isLegacy()) + OuterLoop->markDisableHeuristic(); + if (InnerLoop && !Trans->isLegacy()) + InnerLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyDistribution( + LoopDistributionTransform *Trans, CGTransformedTree *OriginalLoop) { + OriginalLoop->addAttribute(LLVMCtx, false, "llvm.loop.distribute.enable", + true); + + for (CGTransformedTree *F : OriginalLoop->Followups) { + switch (F->FollowupRole) { + case LoopDistributionTransform::FollowupAll: + OriginalLoop->FollowupAttributes.emplace_back( + "llvm.loop.distribute.followup_all", F); + if (!Trans->isLegacy()) + F->markDisableHeuristic(); + break; + } + } + + OriginalLoop->markNondefault(); + if (!Trans->isLegacy()) + OriginalLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyVectorizeInterleave( + LoopVectorizationInterleavingTransform *Trans, + CGTransformedTree *MainLoop) { + bool DisabledVectorization = + !Trans->isVectorizationEnabled().getValueOr(true) || + Trans->getWidth() == 1; + bool ForcedInterleaving = Trans->isInterleavingEnabled().getValueOr(false); + Optional VecEnabled = Trans->isVectorizationEnabled(); + if (ForcedInterleaving) + VecEnabled = true; + + if (Trans->isPredicateEnabled().hasValue() && !DisabledVectorization) { + MainLoop->addAttribute(LLVMCtx, false, + "llvm.loop.vectorize.predicate.enable", + Trans->isPredicateEnabled().getValue()); + if (Trans->isLegacy()) + VecEnabled = true; + } + + if (Trans->getWidth() > 0) { +#if 0 + if (Trans->isLegacy() && !VecEnabled.hasValue()) + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.vectorize.enable", + true); +#endif + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.vectorize.width", + Trans->getWidth()); + } + + if (Trans->getInterleaveCount() > 0) + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.interleave.count", + Trans->getInterleaveCount()); + + if (VecEnabled.hasValue()) + MainLoop->addAttribute(LLVMCtx, false, "llvm.loop.vectorize.enable", + VecEnabled.getValue()); + + for (CGTransformedTree *F : MainLoop->Followups) { + switch (F->FollowupRole) { + case LoopVectorizationInterleavingTransform ::FollowupAll: + if (Trans->isLegacy()) + F->addAttribute(LLVMCtx, true, "llvm.loop.isvectorized"); + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_all", F); + if (!Trans->isLegacy()) + F->markDisableHeuristic(); + break; + case LoopVectorizationInterleavingTransform ::FollowupVectorized: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_vectorized", F); + break; + case LoopVectorizationInterleavingTransform ::FollowupEpilogue: + MainLoop->FollowupAttributes.emplace_back( + "llvm.loop.vectorize.followup_epilogue", F); + break; + } + } + + MainLoop->markNondefault(); + if (!Trans->isLegacy()) + MainLoop->markDisableHeuristic(); +} + +void CGTransformedTreeBuilder::applyPipelining(LoopPipeliningTransform *Trans, + CGTransformedTree *MainLoop) { + if (Trans->getInitiationInterval() > 0) + MainLoop->addAttribute(LLVMCtx, false, + "llvm.loop.pipeline.initiationinterval", + Trans->getInitiationInterval()); + + MainLoop->markNondefault(); +} diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -73,6 +73,7 @@ CGRecordLayoutBuilder.cpp CGStmt.cpp CGStmtOpenMP.cpp + CGTransform.cpp CGVTT.cpp CGVTables.cpp CodeGenABITypes.cpp diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -95,6 +95,7 @@ class TargetCodeGenInfo; struct OMPTaskDataTy; struct CGCoroData; +class CGTransformedTree; /// The kind of evaluation to perform on values of a particular /// type. Basically, is the code in CGExprScalar, CGExprComplex, or @@ -1610,7 +1611,8 @@ llvm::Function *Fn); public: - CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); + CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext = false, + const FunctionDecl *ParentFn = nullptr); ~CodeGenFunction(); CodeGenTypes &getTypes() const { return CGM.getTypes(); } @@ -2933,6 +2935,9 @@ llvm::Value *EmitSEHExceptionInfo(); llvm::Value *EmitSEHAbnormalTermination(); + void HandleCodeTransformations(const Stmt *Body); + void EmitTransformExecutableDirective(const TransformExecutableDirective &D); + /// Emit simple code for OpenMP directives in Simd-only mode. void EmitSimpleOMPExecutableDirective(const OMPExecutableDirective &D); @@ -3191,43 +3196,50 @@ /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetDirective &S); + const OMPTargetDirective &S, + const FunctionDecl *ParentFn); static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelDirective &S); + const OMPTargetParallelDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target parallel for directive. static void EmitOMPTargetParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForDirective &S); + const OMPTargetParallelForDirective &S, const FunctionDecl *ParentFn); /// Emit device code for the target parallel for simd directive. static void EmitOMPTargetParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetParallelForSimdDirective &S); + const OMPTargetParallelForSimdDirective &S, const FunctionDecl *ParentFn); /// Emit device code for the target teams directive. - static void - EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDirective &S); + static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetTeamsDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target teams distribute directive. static void EmitOMPTargetTeamsDistributeDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeDirective &S); + const OMPTargetTeamsDistributeDirective &S, const FunctionDecl *ParentFn); /// Emit device code for the target teams distribute simd directive. static void EmitOMPTargetTeamsDistributeSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeSimdDirective &S); + const OMPTargetTeamsDistributeSimdDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target simd directive. static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, - const OMPTargetSimdDirective &S); + const OMPTargetSimdDirective &S, + const FunctionDecl *ParentFn); /// Emit device code for the target teams distribute parallel for simd /// directive. static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForSimdDirective &S); + const OMPTargetTeamsDistributeParallelForSimdDirective &S, + const FunctionDecl *ParentFn); static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, - const OMPTargetTeamsDistributeParallelForDirective &S); + const OMPTargetTeamsDistributeParallelForDirective &S, + const FunctionDecl *ParentFn); /// Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. @@ -4369,6 +4381,11 @@ llvm::Value *EmitX86CpuSupports(uint64_t Mask); llvm::Value *EmitX86CpuInit(); llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); + + const FunctionDecl *ParentFn = nullptr; + +public: + const FunctionDecl *getParentFn() const { return ParentFn; } }; inline DominatingLLVMValue::saved_type diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -12,9 +12,9 @@ #include "CodeGenFunction.h" #include "CGBlocks.h" -#include "CGCleanup.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" +#include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGOpenMPRuntime.h" #include "CodeGenModule.h" @@ -26,6 +26,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtOpenMP.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetInfo.h" @@ -37,6 +38,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" + using namespace clang; using namespace CodeGen; @@ -57,13 +59,15 @@ return CGOpts.OptimizationLevel != 0; } -CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) +CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext, + const FunctionDecl *ParentFn) : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()), Builder(cgm, cgm.getModule().getContext(), llvm::ConstantFolder(), CGBuilderInserterTy(this)), SanOpts(CGM.getLangOpts().Sanitize), DebugInfo(CGM.getModuleDebugInfo()), - PGO(cgm), ShouldEmitLifetimeMarkers(shouldEmitLifetimeMarkers( - CGM.getCodeGenOpts(), CGM.getLangOpts())) { + ParentFn(ParentFn), PGO(cgm), + ShouldEmitLifetimeMarkers( + shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) { if (!suppressNewContext) CGM.getCXXABI().getMangleContext().startNewFunction(); @@ -1195,6 +1199,8 @@ // Emit the standard function prologue. StartFunction(GD, ResTy, Fn, FnInfo, Args, Loc, BodyRange.getBegin()); + if (Body) + HandleCodeTransformations(Body); // Generate the body of the function. PGO.assignRegionCounters(GD, CurFn); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4420,7 +4420,10 @@ maybeSetTrivialComdat(*D, *Fn); - CodeGenFunction(*this).GenerateCode(D, Fn, FI); + { + CodeGenFunction CGF(*this, false, D); + CGF.GenerateCode(D, Fn, FI); + } setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(D, Fn); diff --git a/clang/test/CodeGenCXX/pragma-followup_inner.cpp b/clang/test/CodeGenCXX/pragma-followup_inner.cpp --- a/clang/test/CodeGenCXX/pragma-followup_inner.cpp +++ b/clang/test/CodeGenCXX/pragma-followup_inner.cpp @@ -20,12 +20,12 @@ // CHECK-DAG: ![[ACCESSGROUP_2:[0-9]+]] = distinct !{} -// CHECK-DAG: ![[INNERLOOP_3:[0-9]+]] = distinct !{![[INNERLOOP_3:[0-9]+]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]} -// CHECK-DAG: ![[PARALLEL_ACCESSES_4:[0-9]+]] = !{!"llvm.loop.parallel_accesses", !2} +// CHECK-DAG: ![[INNERLOOP_3:[0-9]+]] = distinct !{![[INNERLOOP_3:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]} // CHECK-DAG: ![[DISTRIBUTE_5:[0-9]+]] = !{!"llvm.loop.distribute.enable", i1 true} // CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]} // CHECK-DAG: ![[LOOP_7:[0-9]+]] = distinct !{![[LOOP_7:[0-9]+]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]]} +// CHECK-DAG: ![[PARALLEL_ACCESSES_4:[0-9]+]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2:[0-9]+]]} // CHECK-DAG: ![[VECTORIZE_8:[0-9]+]] = !{!"llvm.loop.vectorize.enable", i1 true} // CHECK-DAG: ![[OUTERLOOP_9:[0-9]+]] = distinct !{![[OUTERLOOP_9:[0-9]+]], ![[UNROLLANDJAM_COUNT_10:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPINNER_11:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-followup_outer.cpp b/clang/test/CodeGenCXX/pragma-followup_outer.cpp --- a/clang/test/CodeGenCXX/pragma-followup_outer.cpp +++ b/clang/test/CodeGenCXX/pragma-followup_outer.cpp @@ -17,12 +17,12 @@ // CHECK-DAG: ![[ACCESSGROUP_2:[0-9]+]] = distinct !{} -// CHECK-DAG: ![[LOOP_3:[0-9]+]] = distinct !{![[LOOP_3:[0-9]+]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]} -// CHECK-DAG: ![[PARALLEL_ACCESSES_4:[0-9]+]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]} +// CHECK-DAG: ![[LOOP_3:[0-9]+]] = distinct !{![[LOOP_3:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]} // CHECK-DAG: ![[DISTRIBUTE_5:[0-9]+]] = !{!"llvm.loop.distribute.enable", i1 true} // CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]} // CHECK-DAG: ![[LOOP_7:[0-9]+]] = distinct !{![[LOOP_7:[0-9]+]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]], ![[VECTORIZE_FOLLOWUP_9:[0-9]+]]} +// CHECK-DAG: ![[PARALLEL_ACCESSES_4:[0-9]+]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]} // CHECK-DAG: ![[VECTORIZE_8:[0-9]+]] = !{!"llvm.loop.vectorize.enable", i1 true} // CHECK-DAG: ![[VECTORIZE_FOLLOWUP_9:[0-9]+]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_10:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-distribute.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-distribute.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-distribute.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_distribute(int *List, int Length) { +#pragma clang transform distribute + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[DISTRIBUTE_ENABLE:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_ENABLE]] = !{!"llvm.loop.distribute.enable", i1 true} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-factor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-factor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-factor.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_interleave_factor(int *List, int Length) { +#pragma clang transform interleave factor(2) + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[VECTORIZE_DISABLE:[0-9]+]], ![[INTERLEAVE_COUNT:[0-9]+]], ![[INTERLEAVE_ENABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_DISABLE]] = !{!"llvm.loop.vectorize.width", i32 1} +// CHECK-DAG: ![[INTERLEAVE_COUNT]] = !{!"llvm.loop.interleave.count", i32 2} +// CHECK-DAG: ![[INTERLEAVE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave-successor.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_interleave_successor(int *List, int Length) { +#pragma clang transform unroll +#pragma clang transform interleave + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[VECTORIZE_DISABLE:[0-9]+]], ![[INTERLEAVE_ENABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[VECTORIZE_FOLLOWUP_VECTORIZED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_DISABLE]] = !{!"llvm.loop.vectorize.width", i32 1} +// CHECK-DAG: ![[INTERLEAVE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_VECTORIZED]] = !{!"llvm.loop.vectorize.followup_vectorized", ![[LOOP_VECTORIZED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_VECTORIZED]] = distinct !{![[LOOP_VECTORIZED]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", !12} + +// CHECK-DAG: ![[LOOP_UNROLLED:[0-9]+]] = distinct !{![[LOOP_UNROLLED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-interleave.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_interleave(int *List, int Length) { +#pragma clang transform interleave + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[VECTORIZE_DISABLE:[0-9]+]], ![[INTERLEAVE_ENABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_DISABLE]] = !{!"llvm.loop.vectorize.width", i32 1} +// CHECK-DAG: ![[INTERLEAVE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-full.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-full.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-full.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll_full(int *List, int Length) { +#pragma clang transform unroll full + for (int i = 0; i < 4; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FULL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-partial.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-partial.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-partial.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll_partial(int *List, int Length) { +#pragma clang transform unroll partial(8) + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FACTOR:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FACTOR]] = !{!"llvm.loop.unroll.count", i32 8} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_UNROLL_ALL]] = distinct !{![[LOOP_UNROLL_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll-successor.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll_successor(int *List, int Length) { +#pragma clang transform distribute +#pragma clang transform unroll + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[UNROLL_FOLLOWUP_UNROLLED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_UNROLLED]] = !{!"llvm.loop.unroll.followup_unrolled", ![[LOOP_UNROLLED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLED]] = distinct !{![[LOOP_UNROLLED]], ![[DISTRIBUTE_ENABLE:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_ENABLE]] = !{!"llvm.loop.distribute.enable", i1 true} +// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_ALL]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_DISTRIBUTE_ALL:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLL_ALL]] = distinct !{![[LOOP_UNROLL_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[LOOP_DISTRIBUTE_ALL]] = distinct !{![[LOOP_DISTRIBUTE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unroll.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unroll(int *List, int Length) { +#pragma clang transform unroll + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_UNROLL_ALL]] = distinct !{![[LOOP_UNROLL_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-partial.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-partial.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-partial.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam(int *List, int Length) { +#pragma clang transform unrollandjam partial(4) + for (int i = 0; i < Length; i++) { + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_COUNT:[0-9]+]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_OUTER:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_INNER:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_COUNT]] = !{!"llvm.loop.unroll_and_jam.count", i32 4} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_OUTER]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_UNROLLANDJAM_OUTER:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_INNER]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[LOOP_UNROLLANDJAM_INNER:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_OUTER]] = distinct !{![[LOOP_UNROLLANDJAM_OUTER]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_INNER]] = distinct !{![[LOOP_UNROLLANDJAM_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-predecessor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-predecessor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-predecessor.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam_predecessor(int *List, int Length) { +#pragma clang transform unrollandjam + for (int i = 0; i < Length; i++) { + #pragma clang transform unroll + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_INNER_UNROLL_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_INNER_UNROLL_ALL]] = distinct !{![[LOOP_INNER_UNROLL_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_OUTER:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_INNER:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_OUTER]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_UNROLLANDJAM_OUTER:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_INNER]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[LOOP_UNROLLANDJAM_INNER:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_OUTER]] = distinct !{![[LOOP_UNROLLANDJAM_OUTER]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_INNER]] = distinct !{![[LOOP_UNROLLANDJAM_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam-successor.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam_successor(int *List, int Length) { +#pragma clang transform distribute +#pragma clang transform unrollandjam + for (int i = 0; i < Length; i++) { + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_OUTER:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_INNER:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_OUTER]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_UNROLLANDJAM_OUTER:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_INNER]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[LOOP_UNROLLANDJAM_INNER:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_INNER]] = distinct !{![[LOOP_UNROLLANDJAM_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_OUTER]] = distinct !{![[LOOP_UNROLLANDJAM_OUTER]], ![[DISTRIBUTE_ENABLE:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_ENABLE]] = !{!"llvm.loop.distribute.enable", i1 true} +// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_ALL]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_DISTRIBUTE_FOLLOWUP_ALL:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_DISTRIBUTE_FOLLOWUP_ALL]] = distinct !{![[LOOP_DISTRIBUTE_FOLLOWUP_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-unrollandjam.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_unrollandjam(int *List, int Length) { +#pragma clang transform unrollandjam + for (int i = 0; i < Length; i++) { + for (int j = 0; j < Length; j++) { + List[j] += i * 2; +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_INNER:[0-9]+]] + } +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + } +} + + +// CHECK-DAG: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[UNROLLANDJAM_ENABLE:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_OUTER:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUP_INNER:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_ENABLE]] = !{!"llvm.loop.unroll_and_jam.enable"} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_OUTER]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_UNROLLANDJAM_OUTER:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUP_INNER]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[LOOP_UNROLLANDJAM_INNER:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_OUTER]] = distinct !{![[LOOP_UNROLLANDJAM_OUTER]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_UNROLLANDJAM_INNER]] = distinct !{![[LOOP_UNROLLANDJAM_INNER]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-successor.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-successor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-successor.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +void pragma_transform_vectorize_successor(int *List, int Length) { +#pragma clang transform unroll +#pragma clang transform vectorize + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[INTERLEAVE_DISABLE:[0-9]+]], ![[VECTORIZE_ENABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[VECTORIZE_FOLLOWUP_VECTORIZED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[INTERLEAVE_DISABLE]] = !{!"llvm.loop.interleave.count", i32 1} +// CHECK-DAG: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_VECTORIZED]] = !{!"llvm.loop.vectorize.followup_vectorized", ![[LOOP_VECTORIZED:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} + +// CHECK-DAG: ![[LOOP_VECTORIZED]] = distinct !{![[LOOP_VECTORIZED]], ![[UNROLL_ENABLE:[0-9]+]], ![[UNROLL_FOLLOWUP_ALL:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_ALL:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", !12} + +// CHECK-DAG: ![[LOOP_UNROLLED:[0-9]+]] = distinct !{![[LOOP_UNROLLED:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-width.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-width.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize-width.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_vectorize(int *List, int Length) { +#pragma clang transform vectorize width(4) + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[VECTORIZE_WIDTH:[0-9]+]], ![[INTERLEAVE_DISABLE:[0-9]+]], ![[VECTORIZE_ENABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_WIDTH]] = !{!"llvm.loop.vectorize.width", i32 4} +// CHECK-DAG: ![[INTERLEAVE_DISABLE]] = !{!"llvm.loop.interleave.count", i32 1} +// CHECK-DAG: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]} diff --git a/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize.cpp b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/pragma-transform/pragma-transform-vectorize.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++11 -fexperimental-transform-pragma -emit-llvm -o - %s | FileCheck %s + +extern "C" void pragma_transform_vectorize(int *List, int Length) { +#pragma clang transform vectorize + for (int i = 0; i < Length; i++) { +// CHECK: br label %{{.*}}, !llvm.loop ![[LOOP:[0-9]+]] + List[i] = i * 2; + } +} + + +// CHECK-DAG: ![[LOOP]] = distinct !{![[LOOP]], ![[INTERLEAVE_DISABLE:[0-9]+]], ![[VECTORIZE_ENABLE:[0-9]+]], ![[VECTORIZE_FOLLOWUP_ALL:[0-9]+]], ![[DISABLE_NONFORCED:[0-9]+]]} +// CHECK-DAG: ![[INTERLEAVE_DISABLE]] = !{!"llvm.loop.interleave.count", i32 1} +// CHECK-DAG: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_ALL]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_VECTORIZE_ALL:[0-9]+]]} +// CHECK-DAG: ![[DISABLE_NONFORCED]] = !{!"llvm.loop.disable_nonforced"} + +// CHECK-DAG: ![[LOOP_VECTORIZE_ALL]] = distinct !{![[LOOP_VECTORIZE_ALL]], ![[DISABLE_NONFORCED:[0-9]+]]}