Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
===================================================================
--- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -1315,6 +1315,10 @@
   if (StackAllocMI == nullptr)
     return;
 
+  bool NeedSaveSP = hasFP(MF);
+  bool NeedSaveArg = PrologMBB.isLiveIn(SystemZ::R3D);
+  const int64_t SaveSlotR3 = 2192;
+
   MachineBasicBlock &MBB = PrologMBB;
   const DebugLoc DL = StackAllocMI->getDebugLoc();
 
@@ -1334,7 +1338,25 @@
   // BASR r3,r3
   BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT))
       .addReg(SystemZ::R3D);
-
+  if (NeedSaveArg) {
+    if (!NeedSaveSP) {
+      // LGR r0,r3
+      BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LGR))
+          .addReg(SystemZ::R0D, RegState::Define)
+          .addReg(SystemZ::R3D);
+    } else {
+      // In this case, the incoming value of r4 is saved in r0 so the
+      // latter register is unavailable. Store r3 in its corresponding
+      // slot in the parameter list instead. Do this at the start of
+      // the prolog before r4 is manipulated by anything else.
+      // STG r3, 2192(r4)
+      BuildMI(MBB, MBB.begin(), DL, ZII->get(SystemZ::STG))
+          .addReg(SystemZ::R3D)
+          .addReg(SystemZ::R4D)
+          .addImm(SaveSlotR3)
+          .addReg(0);
+    }
+  }
   // LLGT r3,1208
   BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D)
       .addReg(0)
@@ -1355,6 +1377,28 @@
   NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB);
   MBB.addSuccessor(NextMBB);
   MBB.addSuccessor(StackExtMBB);
+  if (NeedSaveArg) {
+    if (!NeedSaveSP) {
+      // LGR r3, r0
+      BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LGR))
+          .addReg(SystemZ::R3D, RegState::Define)
+          .addReg(SystemZ::R0D, RegState::Kill);
+    } else {
+      // In this case, the incoming value of r4 is saved in r0 so the
+      // latter register is unavailable. We stored r3 in its corresponding
+      // slot in the parameter list instead and we now restore it from there.
+      // LGR r3, r0
+      BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LGR))
+          .addReg(SystemZ::R3D, RegState::Define)
+          .addReg(SystemZ::R0D);
+      // LG r3, 2192(r3)
+      BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LG))
+          .addReg(SystemZ::R3D, RegState::Define)
+          .addReg(SystemZ::R3D)
+          .addImm(SaveSlotR3)
+          .addReg(0);
+    }
+  }
 
   // Add jump back from stack extension BB.
   BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB);
Index: llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
===================================================================
--- llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
+++ llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
@@ -319,12 +319,57 @@
 ; CHECK64: lg  3, 72(3)
 ; CHECK64: basr  3, 3
 ; CHECK64: stmg  6, 7, 2064(4)
-define void @large_stack() {
+define void @large_stack0() {
   %arr = alloca [131072 x i64], align 8
   call i64 (ptr) @fun1(ptr %arr)
   ret void
 }
 
+; CHECK-LABEL: large_stack1
+; CHECK64: agfi	4, -1048768
+; CHECK64: lgr	0, 3
+; CHECK64: llgt	3, 1208
+; CHECK64: cg	4, 64(3)
+; CHECK64: jhe	@BB7_2
+; CHECK64: %bb.1:
+; CHECK64: lg	3, 72(3)
+; CHECK64: basr	3, 3
+; CHECK64: bcr	0, 7
+; CHECK64: @BB7_2:
+; CHECK64: stmg	6, 7, 2064(4)
+; CHECK64: lgr	3, 0
+define void @large_stack1(i64 %n1, i64 %n2, i64 %n3) {
+  %arr = alloca [131072 x i64], align 8
+  call i64 (ptr, i64, i64, i64) @fun3(ptr %arr,
+            i64 %n1, i64 %n2, i64 %n3)
+  ret void
+}
+
+
+; CHECK-LABEL: large_stack2
+; CHECK64: lgr	0, 4
+; CHECK64: stg	3, 2192(4)
+; CHECK64: agfi	4, -1048768
+; CHECK64: llgt	3, 1208
+; CHECK64: cg	4, 64(3)
+; CHECK64: jhe	@BB8_2
+; CHECK64: %bb.1:
+; CHECK64: lg	3, 72(3)
+; CHECK64: basr	3, 3
+; CHECK64: bcr	0, 7
+; CHECK64: @BB8_2:
+; CHECK64: lgr	3, 0
+; CHECK64: lg	3, 2192(3)
+; CHECK64: stmg	4, 11, 2048(4)
+; CHECK64: lgr	8, 4
+define void @large_stack2(i64 %n1, i64 %n2, i64 %n3) {
+  %arr0 = alloca [131072 x i64], align 8
+  %arr1 = alloca i64, i64 %n1, align 8
+  call i64 (ptr, ptr, i64, i64, i64) @fun4(ptr %arr0,
+            ptr %arr1, i64 %n1, i64 %n2, i64 %n3)
+  ret void
+}
+
 ; CHECK-LABEL: leaf_func
 ; CHECK-NOT: aghi  4,
 ; CHECK-NOT: stmg
@@ -343,3 +388,5 @@
 declare i64 @fun(i64 %arg0)
 declare i64 @fun1(ptr %ptr)
 declare i64 @fun2(i64 %n, ptr %arr0, ptr %arr1)
+declare i64 @fun3(ptr %ptr, i64 %n1, i64 %n2, i64 %n3)
+declare i64 @fun4(ptr %ptr0, ptr %ptr1, i64 %n1, i64 %n2, i64 %n3)