Index: cfe/trunk/lib/CodeGen/CGBuilder.h =================================================================== --- cfe/trunk/lib/CodeGen/CGBuilder.h +++ cfe/trunk/lib/CodeGen/CGBuilder.h @@ -15,12 +15,39 @@ namespace clang { namespace CodeGen { +class CodeGenFunction; + +/// \brief This is an IRBuilder insertion helper that forwards to +/// CodeGenFunction::InsertHelper, which adds nesessary metadata to +/// instructions. +template +class CGBuilderInserter + : protected llvm::IRBuilderDefaultInserter { +public: + CGBuilderInserter() : CGF(nullptr) {} + explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {} + +protected: + /// \brief This forwards to CodeGenFunction::InsertHelper. + void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, + llvm::BasicBlock *BB, + llvm::BasicBlock::iterator InsertPt) const; +private: + void operator=(const CGBuilderInserter &) LLVM_DELETED_FUNCTION; + + CodeGenFunction *CGF; +}; + // Don't preserve names on values in an optimized build. #ifdef NDEBUG -typedef llvm::IRBuilder CGBuilderTy; +#define PreserveNames false #else -typedef llvm::IRBuilder<> CGBuilderTy; +#define PreserveNames true #endif +typedef CGBuilderInserter CGBuilderInserterTy; +typedef llvm::IRBuilder CGBuilderTy; +#undef PreserveNames } // end namespace CodeGen } // end namespace clang Index: cfe/trunk/lib/CodeGen/CGLoopInfo.h =================================================================== --- cfe/trunk/lib/CodeGen/CGLoopInfo.h +++ cfe/trunk/lib/CodeGen/CGLoopInfo.h @@ -0,0 +1,136 @@ +//===---- CGLoopInfo.h - LLVM CodeGen for loop metadata -*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the internal state used for llvm translation for loop statement +// metadata. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CODEGEN_CGLOOPINFO_H +#define CLANG_CODEGEN_CGLOOPINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class BasicBlock; +class Instruction; +class MDNode; +} // end namespace llvm + +namespace clang { +namespace CodeGen { + +/// \brief Attributes that may be specified on loops. +struct LoopAttributes { + explicit LoopAttributes(bool IsParallel = false); + void clear(); + + /// \brief Generate llvm.loop.parallel metadata for loads and stores. + bool IsParallel; + + /// \brief Values of llvm.vectorizer.enable metadata. + enum LVEnableState { VecUnspecified, VecEnable, VecDisable }; + + /// \brief llvm.vectorizer.enable + LVEnableState VectorizerEnable; + + /// \brief llvm.vectorizer.width + unsigned VectorizerWidth; + + /// \brief llvm.vectorizer.unroll + unsigned VectorizerUnroll; +}; + +/// \brief Information used when generating a structured loop. +class LoopInfo { +public: + /// \brief Construct a new LoopInfo for the loop with entry Header. + LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs); + + /// \brief Get the loop id metadata for this loop. + llvm::MDNode *getLoopID() const { return LoopID; } + + /// \brief Get the header block of this loop. + llvm::BasicBlock *getHeader() const { return Header; } + + /// \brief Get the set of attributes active for this loop. + const LoopAttributes &getAttributes() const { return Attrs; } + +private: + /// \brief Loop ID metadata. + llvm::MDNode *LoopID; + /// \brief Header block of this loop. + llvm::BasicBlock *Header; + /// \brief The attributes for this loop. + LoopAttributes Attrs; +}; + +/// \brief A stack of loop information corresponding to loop nesting levels. +/// This stack can be used to prepare attributes which are applied when a loop +/// is emitted. +class LoopInfoStack { + LoopInfoStack(const LoopInfoStack &) LLVM_DELETED_FUNCTION; + void operator=(const LoopInfoStack &) LLVM_DELETED_FUNCTION; + +public: + LoopInfoStack() {} + + /// \brief Begin a new structured loop. The set of staged attributes will be + /// applied to the loop and then cleared. + void push(llvm::BasicBlock *Header); + + /// \brief End the current loop. + void pop(); + + /// \brief Return the top loop id metadata. + llvm::MDNode *getCurLoopID() const { return getInfo().getLoopID(); } + + /// \brief Return true if the top loop is parallel. + bool getCurLoopParallel() const { + return hasInfo() ? getInfo().getAttributes().IsParallel : false; + } + + /// \brief Function called by the CodeGenFunction when an instruction is + /// created. + void InsertHelper(llvm::Instruction *I) const; + + /// \brief Set the next pushed loop as parallel. + void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; } + + /// \brief Set the next pushed loop 'vectorizer.enable' + void setVectorizerEnable(bool Enable = true) { + StagedAttrs.VectorizerEnable = + Enable ? LoopAttributes::VecEnable : LoopAttributes::VecDisable; + } + + /// \brief Set the vectorizer width for the next loop pushed. + void setVectorizerWidth(unsigned W) { StagedAttrs.VectorizerWidth = W; } + + /// \brief Set the vectorizer unroll for the next loop pushed. + void setVectorizerUnroll(unsigned U) { StagedAttrs.VectorizerUnroll = U; } + +private: + /// \brief Returns true if there is LoopInfo on the stack. + bool hasInfo() const { return !Active.empty(); } + /// \brief Return the LoopInfo for the current loop. HasInfo should be called + /// first to ensure LoopInfo is present. + const LoopInfo &getInfo() const { return Active.back(); } + /// \brief The set of attributes that will be applied to the next pushed loop. + LoopAttributes StagedAttrs; + /// \brief Stack of active loops. + llvm::SmallVector Active; +}; + +} // end namespace CodeGen +} // end namespace clang + +#endif // CLANG_CODEGEN_CGLOOPINFO_H Index: cfe/trunk/lib/CodeGen/CGLoopInfo.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGLoopInfo.cpp +++ cfe/trunk/lib/CodeGen/CGLoopInfo.cpp @@ -0,0 +1,112 @@ +//===---- CGLoopInfo.cpp - LLVM CodeGen for loop metadata -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "CGLoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +using namespace clang; +using namespace CodeGen; +using namespace llvm; + +static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { + + if (!Attrs.IsParallel && Attrs.VectorizerWidth == 0 && + Attrs.VectorizerUnroll == 0 && + Attrs.VectorizerEnable == LoopAttributes::VecUnspecified) + return nullptr; + + SmallVector Args; + // Reserve operand 0 for loop id self reference. + MDNode *TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode); + + // Setting vectorizer.width + if (Attrs.VectorizerWidth > 0) { + Value *Vals[] = { MDString::get(Ctx, "llvm.vectorizer.width"), + ConstantInt::get(Type::getInt32Ty(Ctx), + Attrs.VectorizerWidth) }; + Args.push_back(MDNode::get(Ctx, Vals)); + } + + // Setting vectorizer.unroll + if (Attrs.VectorizerUnroll > 0) { + Value *Vals[] = { MDString::get(Ctx, "llvm.vectorizer.unroll"), + ConstantInt::get(Type::getInt32Ty(Ctx), + Attrs.VectorizerUnroll) }; + Args.push_back(MDNode::get(Ctx, Vals)); + } + + // Setting vectorizer.enable + if (Attrs.VectorizerEnable != LoopAttributes::VecUnspecified) { + Value *Vals[] = { MDString::get(Ctx, "llvm.vectorizer.enable"), + ConstantInt::get(Type::getInt1Ty(Ctx), + (Attrs.VectorizerEnable == + LoopAttributes::VecEnable)) }; + Args.push_back(MDNode::get(Ctx, Vals)); + } + + MDNode *LoopID = MDNode::get(Ctx, Args); + assert(LoopID->use_empty() && "LoopID should not be used"); + + // Set the first operand to itself. + LoopID->replaceOperandWith(0, LoopID); + MDNode::deleteTemporary(TempNode); + return LoopID; +} + +LoopAttributes::LoopAttributes(bool IsParallel) + : IsParallel(IsParallel), VectorizerEnable(LoopAttributes::VecUnspecified), + VectorizerWidth(0), VectorizerUnroll(0) {} + +void LoopAttributes::clear() { + IsParallel = false; + VectorizerWidth = 0; + VectorizerUnroll = 0; + VectorizerEnable = LoopAttributes::VecUnspecified; +} + +LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs) + : LoopID(nullptr), Header(Header), Attrs(Attrs) { + LoopID = createMetadata(Header->getContext(), Attrs); +} + +void LoopInfoStack::push(BasicBlock *Header) { + Active.push_back(LoopInfo(Header, StagedAttrs)); + // Clear the attributes so nested loops do not inherit them. + StagedAttrs.clear(); +} + +void LoopInfoStack::pop() { + assert(!Active.empty() && "No active loops to pop"); + Active.pop_back(); +} + +void LoopInfoStack::InsertHelper(Instruction *I) const { + if (!hasInfo()) + return; + + const LoopInfo &L = getInfo(); + if (!L.getLoopID()) + return; + + if (TerminatorInst *TI = dyn_cast(I)) { + for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i) + if (TI->getSuccessor(i) == L.getHeader()) { + TI->setMetadata("llvm.loop", L.getLoopID()); + break; + } + return; + } + + if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory()) + I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID()); +} Index: cfe/trunk/lib/CodeGen/CGStmt.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmt.cpp +++ cfe/trunk/lib/CodeGen/CGStmt.cpp @@ -76,7 +76,6 @@ case Stmt::SEHExceptStmtClass: case Stmt::SEHFinallyStmtClass: case Stmt::MSDependentExistsStmtClass: - case Stmt::OMPSimdDirectiveClass: llvm_unreachable("invalid statement class to emit generically"); case Stmt::NullStmtClass: case Stmt::CompoundStmtClass: @@ -176,6 +175,9 @@ case Stmt::OMPParallelDirectiveClass: EmitOMPParallelDirective(cast(*S)); break; + case Stmt::OMPSimdDirectiveClass: + EmitOMPSimdDirective(cast(*S)); + break; } } @@ -510,6 +512,8 @@ JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond"); EmitBlock(LoopHeader.getBlock()); + LoopStack.push(LoopHeader.getBlock()); + // Create an exit block for when the condition fails, which will // also become the break target. JumpDest LoopExit = getJumpDestInCurrentScope("while.end"); @@ -573,6 +577,8 @@ // Branch to the loop header again. EmitBranch(LoopHeader.getBlock()); + LoopStack.pop(); + // Emit the exit block. EmitBlock(LoopExit.getBlock(), true); @@ -593,6 +599,9 @@ // Emit the body of the loop. llvm::BasicBlock *LoopBody = createBasicBlock("do.body"); + + LoopStack.push(LoopBody); + EmitBlockWithFallThrough(LoopBody, Cnt); { RunCleanupsScope BodyScope(*this); @@ -623,6 +632,8 @@ Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(), PGO.createLoopWeights(S.getCond(), Cnt)); + LoopStack.pop(); + // Emit the exit block. EmitBlock(LoopExit.getBlock()); @@ -654,6 +665,8 @@ llvm::BasicBlock *CondBlock = Continue.getBlock(); EmitBlock(CondBlock); + LoopStack.push(CondBlock); + // If the for loop doesn't have an increment we can just use the // condition as the continue block. Otherwise we'll need to create // a block for it (in the current scope, i.e. in the scope of the @@ -724,6 +737,8 @@ if (DI) DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); + LoopStack.pop(); + // Emit the fall-through block. EmitBlock(LoopExit.getBlock(), true); } @@ -749,6 +764,8 @@ llvm::BasicBlock *CondBlock = createBasicBlock("for.cond"); EmitBlock(CondBlock); + LoopStack.push(CondBlock); + // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); @@ -798,6 +815,8 @@ if (DI) DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); + LoopStack.pop(); + // Emit the fall-through block. EmitBlock(LoopExit.getBlock(), true); } Index: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp @@ -49,3 +49,30 @@ CGOpenMPRuntime::OMPRTL__kmpc_fork_call); EmitRuntimeCall(RTLFn, Args); } + +void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + const CapturedStmt *CS = cast(S.getAssociatedStmt()); + const Stmt *Body = CS->getCapturedStmt(); + LoopStack.setParallel(); + LoopStack.setVectorizerEnable(true); + for (auto C : S.clauses()) { + switch (C->getClauseKind()) { + case OMPC_safelen: { + RValue Len = EmitAnyExpr(cast(C)->getSafelen(), + AggValueSlot::ignored(), true); + llvm::ConstantInt *Val = cast(Len.getScalarVal()); + LoopStack.setVectorizerWidth(Val->getZExtValue()); + // In presence of finite 'safelen', it may be unsafe to mark all + // the memory instructions parallel, because loop-carried + // dependences of 'safelen' iterations are possible. + LoopStack.setParallel(false); + break; + } + default: + // Not handled yet + ; + } + } + EmitStmt(Body); +} + Index: cfe/trunk/lib/CodeGen/CMakeLists.txt =================================================================== --- cfe/trunk/lib/CodeGen/CMakeLists.txt +++ cfe/trunk/lib/CodeGen/CMakeLists.txt @@ -38,6 +38,7 @@ CGExprComplex.cpp CGExprConstant.cpp CGExprScalar.cpp + CGLoopInfo.cpp CGObjC.cpp CGObjCGNU.cpp CGObjCMac.cpp Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenFunction.h +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h @@ -16,6 +16,7 @@ #include "CGBuilder.h" #include "CGDebugInfo.h" +#include "CGLoopInfo.h" #include "CGValue.h" #include "CodeGenModule.h" #include "CodeGenPGO.h" @@ -129,8 +130,15 @@ const TargetInfo &Target; typedef std::pair ComplexPairTy; + LoopInfoStack LoopStack; CGBuilderTy Builder; + /// \brief CGBuilder insert helper. This function is called after an + /// instruction is created using Builder. + void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, + llvm::BasicBlock *BB, + llvm::BasicBlock::iterator InsertPt) const; + /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. const Decl *CurFuncDecl; @@ -1883,6 +1891,7 @@ llvm::Value *GenerateCapturedStmtArgument(const CapturedStmt &S); void EmitOMPParallelDirective(const OMPParallelDirective &S); + void EmitOMPSimdDirective(const OMPSimdDirective &S); //===--------------------------------------------------------------------===// // LValue Expression Emission Index: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp +++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp @@ -35,7 +35,8 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()), - Builder(cgm.getModule().getContext()), CapturedStmtInfo(nullptr), + Builder(cgm.getModule().getContext(), llvm::ConstantFolder(), + CGBuilderInserterTy(this)), CapturedStmtInfo(nullptr), SanitizePerformTypeCheck(CGM.getSanOpts().Null | CGM.getSanOpts().Alignment | CGM.getSanOpts().ObjectSize | @@ -1644,3 +1645,30 @@ } CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() { } + +void CodeGenFunction::InsertHelper(llvm::Instruction *I, + const llvm::Twine &Name, + llvm::BasicBlock *BB, + llvm::BasicBlock::iterator InsertPt) const { + LoopStack.InsertHelper(I); +} + +template +void CGBuilderInserter::InsertHelper( + llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, + llvm::BasicBlock::iterator InsertPt) const { + llvm::IRBuilderDefaultInserter::InsertHelper(I, Name, BB, + InsertPt); + if (CGF) + CGF->InsertHelper(I, Name, BB, InsertPt); +} + +#ifdef NDEBUG +#define PreserveNames false +#else +#define PreserveNames true +#endif +template void CGBuilderInserter::InsertHelper( + llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, + llvm::BasicBlock::iterator InsertPt) const; +#undef PreserveNames Index: cfe/trunk/test/OpenMP/simd_metadata.c =================================================================== --- cfe/trunk/test/OpenMP/simd_metadata.c +++ cfe/trunk/test/OpenMP/simd_metadata.c @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -fopenmp=libiomp5 -emit-llvm %s -o - | FileCheck %s + +void h1(float *c, float *a, float *b, int size) +{ +// CHECK-LABEL: define void @h1 + int t = 0; +#pragma omp simd safelen(16) linear(t) + for (int i = 0; i < size; ++i) { + c[i] = a[i] * a[i] + b[i] * b[t]; + ++t; +// do not emit parallel_loop_access metadata due to usage of safelen clause. +// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}} + } +} + +void h2(float *c, float *a, float *b, int size) +{ +// CHECK-LABEL: define void @h2 + int t = 0; +#pragma omp simd linear(t) + for (int i = 0; i < size; ++i) { + c[i] = a[i] * a[i] + b[i] * b[t]; + ++t; +// CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access [[LOOP_H2_HEADER:![0-9]+]] + } +} + +void h3(float *c, float *a, float *b, int size) +{ +// CHECK-LABEL: define void @h3 +#pragma omp simd + for (int i = 0; i < size; ++i) { + for (int j = 0; j < size; ++j) { + c[j*i] = a[i] * b[j]; + } + } +// do not emit parallel_loop_access for nested loop. +// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}} +} + +// Metadata for h1: +// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = metadata !{metadata [[LOOP_H1_HEADER]], metadata [[LOOP_WIDTH_16:![0-9]+]], metadata [[LOOP_VEC_ENABLE:![0-9]+]]} +// CHECK: [[LOOP_WIDTH_16]] = metadata !{metadata !"llvm.vectorizer.width", i32 16} +// CHECK: [[LOOP_VEC_ENABLE]] = metadata !{metadata !"llvm.vectorizer.enable", i1 true} +// +// Metadata for h2: +// CHECK: [[LOOP_H2_HEADER]] = metadata !{metadata [[LOOP_H2_HEADER]], metadata [[LOOP_VEC_ENABLE]]} +// +// Metadata for h3: +// CHECK: [[LOOP_H3_HEADER:![0-9]+]] = metadata !{metadata [[LOOP_H3_HEADER]], metadata [[LOOP_VEC_ENABLE]]} +// +