diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -406,8 +406,7 @@ } // Return true if MBB is the header of a loop marked with -// llvm.loop.unroll.disable. -// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". +// llvm.loop.unroll.disable or llvm.loop.unroll.count=1. bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( const MachineBasicBlock &MBB) const { MachineLoopInfo &LI = getAnalysis(); @@ -428,6 +427,12 @@ PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) { if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) return true; + if (MDNode *UnrollCountMD = + GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) { + if (mdconst::extract(UnrollCountMD->getOperand(1)) + ->getZExtValue() == 1) + return true; + } } } } diff --git a/llvm/test/CodeGen/NVPTX/nounroll.ll b/llvm/test/CodeGen/NVPTX/nounroll.ll --- a/llvm/test/CodeGen/NVPTX/nounroll.ll +++ b/llvm/test/CodeGen/NVPTX/nounroll.ll @@ -34,5 +34,37 @@ ret void } +; Compiled from the following CUDA code: +; +; #pragma unroll 1 +; for (int i = 0; i < 2; ++i) +; output[i] = input[i]; +define void @unroll1(float* %input, float* %output) { +; CHECK-LABEL: .visible .func unroll1( +entry: + br label %for.body + +for.body: +; CHECK: .pragma "nounroll" + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %idxprom = sext i32 %i.06 to i64 + %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 +; CHECK: ld.f32 + %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom + store float %0, float* %arrayidx2, align 4 +; CHECK: st.f32 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 2 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 +; CHECK-NOT: ld.f32 +; CHECK-NOT: st.f32 + +for.end: + ret void +} + !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.unroll.disable"} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.unroll.count", i32 1}