Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -238,10 +238,6 @@ /// void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const; - /// This method prints the label for the specified MachineBasicBlock, an - /// alignment (if present) and a comment describing it if appropriate. - void EmitBasicBlockStart(const MachineBasicBlock &MBB) const; - /// Lower the specified LLVM Constant to an MCExpr. const MCExpr *lowerConstant(const Constant *CV); @@ -271,6 +267,12 @@ /// function. virtual void EmitFunctionBodyEnd() {} + /// Targets can override this to emit stuff at the start of a basic block. + /// By default, this method prints the label for the specified + /// MachineBasicBlock, an alignment (if present) and a comment describing it + /// if appropriate. + virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const; + /// Targets can override this to emit stuff at the end of a basic block. virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {} Index: include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- include/llvm/Transforms/Utils/UnrollLoop.h +++ include/llvm/Transforms/Utils/UnrollLoop.h @@ -16,12 +16,15 @@ #ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H #define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H +#include "llvm/ADT/StringRef.h" + namespace llvm { class AssumptionCache; class Loop; class LoopInfo; class LPPassManager; +class MDNode; class Pass; bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, @@ -30,6 +33,8 @@ bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, LPPassManager* LPM); + +const MDNode *GetUnrollMetadata(const MDNode *LoopID, StringRef Name); } #endif Index: lib/Target/NVPTX/NVPTXAsmPrinter.h =================================================================== --- lib/Target/NVPTX/NVPTXAsmPrinter.h +++ lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -187,6 +187,7 @@ const Function *F; std::string CurrentFnName; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; void EmitFunctionEntryLabel() override; void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; @@ -281,6 +282,8 @@ MCOperand &MCOp); void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); + bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const; + LineReader *reader; LineReader *getReader(std::string); @@ -311,6 +314,11 @@ delete reader; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AsmPrinter::getAnalysisUsage(AU); + } + bool ignoreLoc(const MachineInstr &); std::string getVirtualRegisterName(unsigned) const; Index: lib/Target/NVPTX/NVPTXAsmPrinter.cpp =================================================================== --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugInfo.h" @@ -45,6 +46,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TimeValue.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" #include using namespace llvm; @@ -418,6 +420,44 @@ printReturnValStr(F, O); } +// Return true if MBB is the header of a loop marked with +// llvm.loop.unroll.disable. +// TODO(jingyue): consider "#pragma unroll 1" which is equivalent to "#pragma +// nounroll". +bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( + const MachineBasicBlock &MBB) const { + MachineLoopInfo &LI = getAnalysis(); + // TODO(jingyue): isLoopHeader() should take "const MachineBasicBlock *". + // We insert .pragma "nounroll" only to the loop header. + if (!LI.isLoopHeader(const_cast(&MBB))) + return false; + + // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, + // we iterate through each back edge of the loop with header MBB, and check + // whether its metadata contains llvm.loop.unroll.disable. + for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { + const MachineBasicBlock *PMBB = *I; + if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { + // Edges from other loops to MBB are not back edges. + continue; + } + if (const BasicBlock *PBB = PMBB->getBasicBlock()) { + if (const MDNode *LoopID = + PBB->getTerminator()->getMetadata("llvm.loop")) { + if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) + return true; + } + } + } + return false; +} + +void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + AsmPrinter::EmitBasicBlockStart(MBB); + if (isLoopHeaderOfNoUnroll(MBB)) + OutStreamer.EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); +} + void NVPTXAsmPrinter::EmitFunctionEntryLabel() { SmallString<128> Str; raw_svector_ostream O(Str); Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -234,44 +234,27 @@ // Returns the loop hint metadata node with the given name (for example, // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is // returned. -static const MDNode *GetUnrollMetadata(const Loop *L, StringRef Name) { +static const MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) { MDNode *LoopID = L->getLoopID(); if (!LoopID) return nullptr; - - // First operand should refer to the loop id itself. - assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); - assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); - - for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { - const MDNode *MD = dyn_cast(LoopID->getOperand(i)); - if (!MD) - continue; - - const MDString *S = dyn_cast(MD->getOperand(0)); - if (!S) - continue; - - if (Name.equals(S->getString())) - return MD; - } - return nullptr; + return GetUnrollMetadata(LoopID, Name); } // Returns true if the loop has an unroll(full) pragma. static bool HasUnrollFullPragma(const Loop *L) { - return GetUnrollMetadata(L, "llvm.loop.unroll.full"); + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); } // Returns true if the loop has an unroll(disable) pragma. static bool HasUnrollDisablePragma(const Loop *L) { - return GetUnrollMetadata(L, "llvm.loop.unroll.disable"); + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); } // If loop has an unroll_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. static unsigned UnrollCountPragmaValue(const Loop *L) { - const MDNode *MD = GetUnrollMetadata(L, "llvm.loop.unroll.count"); + const MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count"); if (MD) { assert(MD->getNumOperands() == 2 && "Unroll count hint metadata should have two operands."); Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -549,3 +549,26 @@ return true; } + +/// Given an llvm.loop loop id metadata node, returns the loop hint metadata +/// node with the given name (for example, "llvm.loop.unroll.count"). If no +/// such metadata node exists, then nullptr is returned. +const MDNode *llvm::GetUnrollMetadata(const MDNode *LoopID, StringRef Name) { + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + const MDNode *MD = dyn_cast(LoopID->getOperand(i)); + if (!MD) + continue; + + const MDString *S = dyn_cast(MD->getOperand(0)); + if (!S) + continue; + + if (Name.equals(S->getString())) + return MD; + } + return nullptr; +} Index: test/CodeGen/NVPTX/nounroll.ll =================================================================== --- /dev/null +++ test/CodeGen/NVPTX/nounroll.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +; Compiled from the following CUDA code: +; +; #pragma nounroll +; for (int i = 0; i < 2; ++i) +; output[i] = input[i]; +define void @nounroll(float* %input, float* %output) { +; CHECK-LABEL: .visible .func nounroll( +entry: + br label %for.body + +for.body: +; CHECK: .pragma "nounroll" + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %idxprom = sext i32 %i.06 to i64 + %arrayidx = getelementptr inbounds float* %input, i64 %idxprom + %0 = load float* %arrayidx, align 4 +; CHECK: ld.f32 + %arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom + store float %0, float* %arrayidx2, align 4 +; CHECK: st.f32 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 2 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 +; CHECK-NOT: ld.f32 +; CHECK-NOT: st.f32 + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable"}