diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -881,7 +881,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, - bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) { + bool NeedsDwarfCFI, bool NeedsWinCFI, bool *HasWinCFI, + bool InProlog = true) { // Ignore instructions that do not operate on SP, i.e. shadow call stack // instructions and associated CFI instruction. while (MBBI->getOpcode() == AArch64::STRXpost || @@ -978,6 +979,13 @@ MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands()); + if (NeedsDwarfCFI && !InProlog) { + unsigned CFIIndex = MBB.getParent()->addFrameInst( + MCCFIInstruction::createAdjustCfaOffset(nullptr, -CSStackSizeInc)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + // Generate a new SEH code that corresponds to the new instruction. if (NeedsWinCFI) { *HasWinCFI = true; @@ -990,12 +998,12 @@ // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. -static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, +static bool fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI) { if (AArch64InstrInfo::isSEHInstruction(MI)) - return; + return false; unsigned Opc = MI.getOpcode(); @@ -1005,7 +1013,7 @@ Opc == AArch64::CFI_INSTRUCTION) { if (Opc != AArch64::CFI_INSTRUCTION) assert(MI.getOperand(0).getReg() != AArch64::SP); - return; + return false; } unsigned Scale; @@ -1047,6 +1055,7 @@ "Expecting a SEH instruction"); fixupSEHOpcode(MBBI, LocalStackSize); } + return true; } static void adaptForLdStOpt(MachineBasicBlock &MBB, @@ -1246,7 +1255,7 @@ NumBytes -= PrologueSaveSize; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI); + MBB, MBBI, DL, TII, -PrologueSaveSize, false, NeedsWinCFI, &HasWinCFI); NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -1676,6 +1685,10 @@ int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); AArch64FunctionInfo *AFI = MF.getInfo(); + // TODO Also emit CFI when SVECalleeSavedStackSize != 0. + bool NeedsDwarfCFI = MF.getTarget().getTargetTriple().isOSBinFormatELF() && + MF.getFunction().needsUnwindTableEntry() && + AFI->getSVECalleeSavedStackSize() == 0; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -1759,10 +1772,11 @@ // If the offset is 0 and the AfterCSR pop is not actually trying to // allocate more stack for arguments (in space that an untimely interrupt // may clobber), convert it to a post-index ldp. - if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) - convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false); - else { + if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { + convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, + PrologueSaveSize, NeedsDwarfCFI, + NeedsWinCFI, &HasWinCFI, false); + } else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR @@ -1771,6 +1785,7 @@ } } + bool EmitCfi = NeedsDwarfCFI && !hasFP(MF); // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. @@ -1782,9 +1797,17 @@ IsSVECalleeSave(LastPopI)) { ++LastPopI; break; - } else if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), - NeedsWinCFI, &HasWinCFI); + } else if (CombineSPBump) { + if (fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), + NeedsWinCFI, &HasWinCFI) && + EmitCfi) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createAdjustCfaOffset( + nullptr, AFI->getLocalStackSize())); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } } if (MF.hasWinCFI()) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll @@ -14,6 +14,7 @@ ; CHECK-NEXT: str w8, [sp] ; CHECK-NEXT: bl byval_i32 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret call void @byval_i32(i32* byval(i32) %incoming) diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -28,6 +28,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #10 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -61,6 +62,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, @@ -105,6 +107,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #6 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -138,6 +141,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, @@ -184,6 +188,7 @@ ; CHECK-NEXT: mov w8, #-6 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -217,6 +222,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, @@ -261,6 +267,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #6 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -294,6 +301,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, @@ -338,6 +346,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #10 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -371,6 +380,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, @@ -417,6 +427,7 @@ ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -450,6 +461,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, @@ -497,6 +509,7 @@ ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -530,6 +543,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg @@ -577,6 +591,7 @@ ; CHECK-NEXT: mov w8, #6 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -610,6 +625,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg @@ -656,6 +672,7 @@ ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -689,6 +706,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -39,6 +39,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_acq ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap: @@ -90,6 +91,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_seqcst: @@ -141,6 +143,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_rel ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_release: @@ -192,6 +195,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_relax ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_monotonic: diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll --- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -89,6 +89,7 @@ ; CHECK-NEXT: adrp x8, var64 ; CHECK-NEXT: str x0, [x8, :lo12:var64] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -120,6 +121,7 @@ ; CHECK-NEXT: adrp x8, var64 ; CHECK-NEXT: str x0, [x8, :lo12:var64] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -150,6 +152,7 @@ ; CHECK-NEXT: bl __floatditf ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %src32 = load i32, i32* @var32 @@ -180,6 +183,7 @@ ; CHECK-NEXT: bl __floatunditf ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %src32 = load i32, i32* @var32 @@ -207,6 +211,7 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, le ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -233,6 +238,7 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, gt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -265,6 +271,7 @@ ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: orr w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -293,10 +300,12 @@ ; CHECK-NEXT: // %bb.1: // %iftrue ; CHECK-NEXT: mov w0, #42 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_2: // %iffalse ; CHECK-NEXT: mov w0, #29 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -355,6 +364,7 @@ ; CHECK-NEXT: adrp x8, vardouble ; CHECK-NEXT: str d0, [x8, :lo12:vardouble] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -393,6 +403,7 @@ ; CHECK-NEXT: bl __extenddftf2 ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -185,6 +185,7 @@ ; CHECK-NEXT: bl use_4xi1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -444,6 +444,7 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -48 ; CHECK-NEXT: ret entry: %size = getelementptr inbounds %struct.Struct, %struct.Struct* %hdCall, i64 0, i32 0 @@ -513,6 +514,7 @@ ; CHECK-NEXT: .LBB7_8: // %return ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 @@ -589,10 +591,12 @@ ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: mov w0, #123 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_6: // %if.end ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 @@ -676,6 +680,7 @@ ; CHECK-NEXT: .LBB9_4: // %return ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret ; CHECK-LABEL-DAG: .LBB9_3 @@ -728,6 +733,7 @@ ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: bl zoo ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; [...] diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -193,6 +193,7 @@ ; CHECK-NEXT: cbnz w19, .LBB9_2 ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_2: // %if.then ; CHECK-NEXT: bl foo diff --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll --- a/llvm/test/CodeGen/AArch64/csr-split.ll +++ b/llvm/test/CodeGen/AArch64/csr-split.ll @@ -19,12 +19,14 @@ ; CHECK-NEXT: b.eq .LBB0_2 ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %if.then ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: b callNonVoid ; ; CHECK-APPLE-LABEL: test1: @@ -92,11 +94,13 @@ ; CHECK-NEXT: .LBB1_2: // %return ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_3: // %if.then2 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: b callNonVoid ; ; CHECK-APPLE-LABEL: test2: @@ -171,6 +175,7 @@ ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret ; ; CHECK-APPLE-LABEL: test3: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -276,6 +276,7 @@ ; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f128.v1i32(<1 x fp128> %f) @@ -345,6 +346,9 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f128.v2i32(<2 x fp128> %f) @@ -435,6 +439,9 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: mov v0.s[2], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f128.v3i32(<3 x fp128> %f) @@ -545,6 +552,9 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: add sp, sp, #144 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f128.v4i32(<4 x fp128> %f) @@ -1068,6 +1078,11 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float> %f) @@ -1135,6 +1150,11 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float> %f) @@ -1396,6 +1416,11 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f) @@ -1462,6 +1487,11 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f) @@ -1927,6 +1957,13 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half> %f) @@ -2035,6 +2072,13 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half> %f) diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -269,6 +269,7 @@ ; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f) @@ -322,6 +323,8 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f) @@ -390,6 +393,8 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: mov v0.s[2], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f) @@ -474,6 +479,8 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f128.v4i32(<4 x fp128> %f) @@ -951,6 +958,9 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f) @@ -1001,6 +1011,9 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f) @@ -1218,6 +1231,9 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f) @@ -1267,6 +1283,9 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f) @@ -1656,6 +1675,11 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f) @@ -1741,6 +1765,11 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f) diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -35,6 +35,7 @@ ; CHECK-NEXT: .LBB0_3: // %common.ret ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/local_vars.ll b/llvm/test/CodeGen/AArch64/local_vars.ll --- a/llvm/test/CodeGen/AArch64/local_vars.ll +++ b/llvm/test/CodeGen/AArch64/local_vars.ll @@ -39,9 +39,11 @@ ret void ; CHECK: ldr x30, [sp], #16 +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-WITHFP-ARM64: ldp x29, x30, [sp], #16 +; CHECK-WITHFP-ARM64-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-WITHFP-ARM64-NEXT: ret } diff --git a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll --- a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -56,6 +56,7 @@ ; CHECK-NEXT: str w19, [x8] ; CHECK-NEXT: str w9, [x8] ; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %val1 = load i32, i32* @var1_32 %val2 = load i32, i32* @var2_32 @@ -176,6 +177,7 @@ ; CHECK-NEXT: str x19, [x8] ; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %val1 = load i64, i64* @var1_64 %val2 = load i64, i64* @var2_64 diff --git a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll --- a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll @@ -36,6 +36,7 @@ ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 @@ -92,6 +93,7 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 @@ -149,6 +151,7 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll @@ -12,9 +12,10 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl [[OUTLINED_DIRECT:OUTLINED_FUNCTION_[0-9]+]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: add w0, w0, #8 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) @@ -28,9 +29,10 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl [[OUTLINED_DIRECT]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: add w0, w0, #88 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) @@ -44,9 +46,10 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl [[OUTLINED_INDIRECT:OUTLINED_FUNCTION_[0-9]+]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: add w0, w0, #8 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) @@ -60,9 +63,10 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl [[OUTLINED_INDIRECT]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: add w0, w0, #88 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -45,6 +45,7 @@ ; A53-NEXT: adrp x8, gv1 ; A53-NEXT: str x0, [x8, :lo12:gv1] ; A53-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; A53-NEXT: .cfi_adjust_cfa_offset -16 ; A53-NEXT: ret ; A53-NEXT: .LBB0_4: // %while.body.i.split ; A53-NEXT: // =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll --- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll +++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll @@ -24,12 +24,14 @@ ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w0, w8, #0x100 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: cbz w0, .LBB0_5 ; CHECK-NEXT: .LBB0_3: // %common.ret.sink.split ; CHECK-NEXT: b extfunc ; CHECK-NEXT: .LBB0_4: // %b2 ; CHECK-NEXT: bl extfunc ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: cbnz w0, .LBB0_3 ; CHECK-NEXT: .LBB0_5: // %common.ret ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -166,6 +166,7 @@ ; CHECK-NEXT: stp q0, q2, [x8, #464] ; CHECK-NEXT: str q1, [x8, #496] ; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -64 ; CHECK-NEXT: ret entry: br label %for.cond1.preheader diff --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll --- a/llvm/test/CodeGen/AArch64/shift_minsize.ll +++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll @@ -16,6 +16,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: lsl x0, x0, x1 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f0: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: lsl x0, x0, x1 +; CHECK-DARWIN-NEXT: ret %res = shl i64 %val, %amt ret i64 %res } @@ -26,6 +31,12 @@ ; CHECK-NEXT: lsl x0, x0, x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f1: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: lsl x0, x0, x1 +; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-DARWIN-NEXT: ret %a = shl i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -37,6 +48,12 @@ ; CHECK-NEXT: asr x0, x0, x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f2: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: asr x0, x0, x1 +; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-DARWIN-NEXT: ret %a = ashr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -48,6 +65,12 @@ ; CHECK-NEXT: lsr x0, x0, x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f3: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: lsr x0, x0, x1 +; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-DARWIN-NEXT: ret %a = lshr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -62,7 +85,22 @@ ; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __ashlti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: shl128: +; CHECK-DARWIN: ; %bb.0: ; %entry +; CHECK-DARWIN-NEXT: mvn w8, w2 +; CHECK-DARWIN-NEXT: lsr x9, x0, #1 +; CHECK-DARWIN-NEXT: mov w10, w2 +; CHECK-DARWIN-NEXT: lsr x8, x9, x8 +; CHECK-DARWIN-NEXT: lsl x9, x1, x10 +; CHECK-DARWIN-NEXT: lsl x11, x0, x10 +; CHECK-DARWIN-NEXT: tst x10, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x9, x8 +; CHECK-DARWIN-NEXT: csel x1, x11, x8, ne +; CHECK-DARWIN-NEXT: csel x0, xzr, x11, ne +; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 @@ -89,7 +127,23 @@ ; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __ashrti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: ashr128: +; CHECK-DARWIN: ; %bb.0: ; %entry +; CHECK-DARWIN-NEXT: mov w8, w2 +; CHECK-DARWIN-NEXT: mvn w9, w2 +; CHECK-DARWIN-NEXT: lsl x10, x1, #1 +; CHECK-DARWIN-NEXT: lsl x9, x10, x9 +; CHECK-DARWIN-NEXT: lsr x11, x0, x8 +; CHECK-DARWIN-NEXT: asr x10, x1, #63 +; CHECK-DARWIN-NEXT: asr x12, x1, x8 +; CHECK-DARWIN-NEXT: tst x8, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x9, x11 +; CHECK-DARWIN-NEXT: csel x0, x12, x8, ne +; CHECK-DARWIN-NEXT: csel x1, x10, x12, ne +; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64 @@ -115,7 +169,22 @@ ; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __lshrti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: lshr128: +; CHECK-DARWIN: ; %bb.0: ; %entry +; CHECK-DARWIN-NEXT: mov w8, w2 +; CHECK-DARWIN-NEXT: mvn w9, w2 +; CHECK-DARWIN-NEXT: lsl x10, x1, #1 +; CHECK-DARWIN-NEXT: lsr x11, x0, x8 +; CHECK-DARWIN-NEXT: lsl x9, x10, x9 +; CHECK-DARWIN-NEXT: lsr x10, x1, x8 +; CHECK-DARWIN-NEXT: tst x8, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x9, x11 +; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne +; CHECK-DARWIN-NEXT: csel x1, xzr, x10, ne +; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64 diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -154,6 +154,7 @@ ; CHECK-NEXT: and w0, w19, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)] diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -522,8 +522,9 @@ ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 ; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0 -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -226,8 +226,9 @@ ; CHECK-NEXT: st1d { z1.d }, p0, [x0, #2, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] ; CHECK-NEXT: st1d { z3.d }, p0, [x0] +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %v0 = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv0, i64 0) %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( %v0, <2 x i64> %sv1, i64 4) @@ -263,8 +264,9 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [x1, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x1] +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 2) diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -34,8 +34,9 @@ ; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldrb w0, [x8, x9] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i8 %ext @@ -59,8 +60,9 @@ ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldrh w0, [x8, x9, lsl #1] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i16 %ext @@ -84,8 +86,9 @@ ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldr w0, [x8, x9, lsl #2] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i32 %ext @@ -111,8 +114,9 @@ ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] -; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i64 %ext @@ -155,8 +159,9 @@ ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: ldrh w0, [x8, x9, lsl #1] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ext = extractelement %a, i32 128 ret i16 %ext @@ -182,8 +187,9 @@ ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: ldr w0, [x8, x9, lsl #2] -; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ret %ext = extractelement %a, i32 100000 ret i32 %ext @@ -206,8 +212,9 @@ ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ext = extractelement %a, i32 10 ret i64 %ext diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -34,8 +34,9 @@ ; CHECK-NEXT: strb w0, [x9, x8] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ins = insertelement %a, i8 %elt, i64 %idx ret %ins @@ -59,8 +60,9 @@ ; CHECK-NEXT: str s2, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ins = insertelement %a, float %elt, i64 %idx ret %ins @@ -88,8 +90,9 @@ ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x9, #2, mul vl] ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ret %ins = insertelement %a, i64 %elt, i64 %idx ret %ins @@ -151,8 +154,9 @@ ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x8, #2, mul vl] ; CHECK-NEXT: ld1h { z3.h }, p0/z, [x8, #3, mul vl] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] -; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ret %ins = insertelement %a, i16 %elt, i64 128 ret %ins @@ -178,8 +182,9 @@ ; CHECK-NEXT: str w0, [x8, x9, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] -; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ret %ins = insertelement %a, i32 %elt, i64 1000000 ret %ins diff --git a/llvm/test/CodeGen/AArch64/sve-varargs.ll b/llvm/test/CodeGen/AArch64/sve-varargs.ll --- a/llvm/test/CodeGen/AArch64/sve-varargs.ll +++ b/llvm/test/CodeGen/AArch64/sve-varargs.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: add x0, x0, :lo12:.str_1 ; CHECK-NEXT: bl sve_printf ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %f = getelementptr [6 x i8], [6 x i8]* @.str_1, i64 0, i64 0 call i32 (i8*, , ...) @sve_printf(i8* %f, %x)