diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -33,7 +33,7 @@
                                       MachineBasicBlock &MBB) const {
   if (MF.getFrameInfo().hasStackObjects()) {
     assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-    MachineInstr *MI = &MBB.front();
+    MachineBasicBlock::iterator MBBI = MBB.begin();
     MachineRegisterInfo &MR = MF.getRegInfo();
 
     const NVPTXRegisterInfo *NRI =
@@ -55,12 +55,13 @@
         (Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
     if (!MR.use_empty(NRI->getFrameRegister(MF))) {
       // If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
-      MI = BuildMI(MBB, MI, dl,
-                   MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
-                   NRI->getFrameRegister(MF))
-               .addReg(NRI->getFrameLocalRegister(MF));
+      MBBI = BuildMI(MBB, MBBI, dl,
+                     MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
+                     NRI->getFrameRegister(MF))
+                 .addReg(NRI->getFrameLocalRegister(MF));
     }
-    BuildMI(MBB, MI, dl, MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
+    BuildMI(MBB, MBBI, dl,
+            MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
             NRI->getFrameLocalRegister(MF))
         .addImm(MF.getFunctionNumber());
   }
diff --git a/llvm/test/CodeGen/NVPTX/bug52623.ll b/llvm/test/CodeGen/NVPTX/bug52623.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/bug52623.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_75 -verify-machineinstrs
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_75 | %ptxas-verify %}
+
+; Check that llc will not crash even when first MBB doesn't contain
+; any instruction.
+
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64"
+
+%printf_args.0.8 = type { i8* }
+
+define internal i32 @__kmpc_get_hardware_thread_id_in_block(i1 %0) {
+  %2 = alloca %printf_args.0.8, i32 0, align 8
+  %3 = bitcast %printf_args.0.8* %2 to i8*
+  br i1 true, label %._crit_edge1, label %._crit_edge
+
+._crit_edge:                                      ; preds = %1, %._crit_edge
+  %4 = call i32 null(i8* null, i8* %3)
+  br i1 %0, label %._crit_edge, label %._crit_edge1
+
+._crit_edge1:                                     ; preds = %._crit_edge, %1
+  ret i32 0
+
+; uselistorder directives
+  uselistorder label %._crit_edge, { 1, 0 }
+}