diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -881,7 +881,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, - bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) { + bool NeedsDwarfCFI, bool NeedsWinCFI, bool *HasWinCFI, + bool InProlog = true) { // Ignore instructions that do not operate on SP, i.e. shadow call stack // instructions and associated CFI instruction. while (MBBI->getOpcode() == AArch64::STRXpost || @@ -978,6 +979,13 @@ MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands()); + if (NeedsDwarfCFI && !InProlog) { + unsigned CFIIndex = MBB.getParent()->addFrameInst( + MCCFIInstruction::createAdjustCfaOffset(nullptr, -CSStackSizeInc)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + // Generate a new SEH code that corresponds to the new instruction. if (NeedsWinCFI) { *HasWinCFI = true; @@ -990,12 +998,12 @@ // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. -static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, +static bool fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI) { if (AArch64InstrInfo::isSEHInstruction(MI)) - return; + return false; unsigned Opc = MI.getOpcode(); @@ -1005,7 +1013,7 @@ Opc == AArch64::CFI_INSTRUCTION) { if (Opc != AArch64::CFI_INSTRUCTION) assert(MI.getOperand(0).getReg() != AArch64::SP); - return; + return false; } unsigned Scale; @@ -1047,6 +1055,7 @@ "Expecting a SEH instruction"); fixupSEHOpcode(MBBI, LocalStackSize); } + return true; } static void adaptForLdStOpt(MachineBasicBlock &MBB, @@ -1246,7 +1255,7 @@ NumBytes -= PrologueSaveSize; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI); + MBB, MBBI, DL, TII, -PrologueSaveSize, false, NeedsWinCFI, &HasWinCFI); NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -1676,6 +1685,10 @@ int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); AArch64FunctionInfo *AFI = MF.getInfo(); + // TODO Also emit CFI when SVECalleeSavedStackSize != 0. + bool NeedsDwarfCFI = MF.getTarget().getTargetTriple().isOSBinFormatELF() && + MF.getFunction().needsUnwindTableEntry() && + AFI->getSVECalleeSavedStackSize() == 0; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -1759,10 +1772,11 @@ // If the offset is 0 and the AfterCSR pop is not actually trying to // allocate more stack for arguments (in space that an untimely interrupt // may clobber), convert it to a post-index ldp. - if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) - convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false); - else { + if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { + convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, + PrologueSaveSize, NeedsDwarfCFI, + NeedsWinCFI, &HasWinCFI, false); + } else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR @@ -1771,6 +1785,7 @@ } } + bool EmitCfi = NeedsDwarfCFI && !hasFP(MF); // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. @@ -1778,13 +1793,25 @@ MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; + // Skip CFI_INSTRUCTION added in a previous iteration. + if (LastPopI->getOpcode() == AArch64::CFI_INSTRUCTION) + continue; if (!LastPopI->getFlag(MachineInstr::FrameDestroy) || IsSVECalleeSave(LastPopI)) { ++LastPopI; break; - } else if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), - NeedsWinCFI, &HasWinCFI); + } else if (CombineSPBump) { + if (fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), + NeedsWinCFI, &HasWinCFI) && + EmitCfi) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createAdjustCfaOffset( + nullptr, AFI->getLocalStackSize())); + BuildMI(MBB, std::next(LastPopI), DL, + TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } } if (MF.hasWinCFI()) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll @@ -14,6 +14,7 @@ ; CHECK-NEXT: str w8, [sp] ; CHECK-NEXT: bl byval_i32 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret call void @byval_i32(i32* byval(i32) %incoming) diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -28,6 +28,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #10 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -61,6 +62,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, @@ -105,6 +107,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: add w0, w19, #6 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -138,6 +141,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, @@ -184,6 +188,7 @@ ; CHECK-NEXT: mov w8, #-6 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = add i32 %arg, 8 call void @use(i32 %t0) @@ -217,6 +222,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = add <4 x i32> %arg, @@ -261,6 +267,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #6 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -294,6 +301,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, @@ -338,6 +346,7 @@ ; CHECK-NEXT: bl use ; CHECK-NEXT: sub w0, w19, #10 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -371,6 +380,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, @@ -417,6 +427,7 @@ ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 %arg, 8 call void @use(i32 %t0) @@ -450,6 +461,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> %arg, @@ -497,6 +509,7 @@ ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -530,6 +543,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg @@ -577,6 +591,7 @@ ; CHECK-NEXT: mov w8, #6 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -610,6 +625,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg @@ -656,6 +672,7 @@ ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %t0 = sub i32 8, %arg call void @use(i32 %t0) @@ -689,6 +706,7 @@ ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %arg diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -39,6 +39,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_acq ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap: @@ -90,6 +91,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_seqcst: @@ -141,6 +143,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_rel ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_release: @@ -192,6 +195,7 @@ ; OUTLINE-NEXT: mov x4, x8 ; OUTLINE-NEXT: bl __aarch64_cas16_relax ; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-NEXT: .cfi_adjust_cfa_offset -16 ; OUTLINE-NEXT: ret ; ; LSE-LABEL: val_compare_and_swap_monotonic: diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll --- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -89,6 +89,7 @@ ; CHECK-NEXT: adrp x8, var64 ; CHECK-NEXT: str x0, [x8, :lo12:var64] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -120,6 +121,7 @@ ; CHECK-NEXT: adrp x8, var64 ; CHECK-NEXT: str x0, [x8, :lo12:var64] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 @@ -150,6 +152,7 @@ ; CHECK-NEXT: bl __floatditf ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %src32 = load i32, i32* @var32 @@ -180,6 +183,7 @@ ; CHECK-NEXT: bl __floatunditf ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %src32 = load i32, i32* @var32 @@ -207,6 +211,7 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, le ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -233,6 +238,7 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, gt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -265,6 +271,7 @@ ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: orr w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -293,10 +300,12 @@ ; CHECK-NEXT: // %bb.1: // %iftrue ; CHECK-NEXT: mov w0, #42 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_2: // %iffalse ; CHECK-NEXT: mov w0, #29 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 @@ -355,6 +364,7 @@ ; CHECK-NEXT: adrp x8, vardouble ; CHECK-NEXT: str d0, [x8, :lo12:vardouble] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -393,6 +403,7 @@ ; CHECK-NEXT: bl __extenddftf2 ; CHECK-NEXT: str q0, [x19, :lo12:lhs] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %val = load fp128, fp128* @lhs, align 16 diff --git a/llvm/test/CodeGen/AArch64/call-rv-marker.ll b/llvm/test/CodeGen/AArch64/call-rv-marker.ll --- a/llvm/test/CodeGen/AArch64/call-rv-marker.ll +++ b/llvm/test/CodeGen/AArch64/call-rv-marker.ll @@ -44,6 +44,7 @@ ; CHECK-NEXT: bl foo0 ; SELDAG-NEXT: mov x29, x29 ; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: b foo2 ; entry: diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -185,6 +185,7 @@ ; CHECK-NEXT: bl use_4xi1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -444,6 +444,7 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -48 ; CHECK-NEXT: ret entry: %size = getelementptr inbounds %struct.Struct, %struct.Struct* %hdCall, i64 0, i32 0 @@ -513,6 +514,7 @@ ; CHECK-NEXT: .LBB7_8: // %return ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 @@ -589,10 +591,12 @@ ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: mov w0, #123 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_6: // %if.end ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 @@ -676,6 +680,7 @@ ; CHECK-NEXT: .LBB9_4: // %return ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret ; CHECK-LABEL-DAG: .LBB9_3 @@ -728,6 +733,7 @@ ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: bl zoo ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; [...] diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -193,6 +193,7 @@ ; CHECK-NEXT: cbnz w19, .LBB9_2 ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_2: // %if.then ; CHECK-NEXT: bl foo diff --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll --- a/llvm/test/CodeGen/AArch64/csr-split.ll +++ b/llvm/test/CodeGen/AArch64/csr-split.ll @@ -19,12 +19,14 @@ ; CHECK-NEXT: b.eq .LBB0_2 ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %if.then ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: b callNonVoid ; ; CHECK-APPLE-LABEL: test1: @@ -92,11 +94,13 @@ ; CHECK-NEXT: .LBB1_2: // %return ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_3: // %if.then2 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: b callNonVoid ; ; CHECK-APPLE-LABEL: test2: @@ -171,6 +175,7 @@ ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret ; ; CHECK-APPLE-LABEL: test3: diff --git a/llvm/test/CodeGen/AArch64/fastcc.ll b/llvm/test/CodeGen/AArch64/fastcc.ll --- a/llvm/test/CodeGen/AArch64/fastcc.ll +++ b/llvm/test/CodeGen/AArch64/fastcc.ll @@ -161,6 +161,7 @@ ; CHECK: nop ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldr x20, [sp], #16 +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-TAIL-LABEL: func_stack32_leaf: @@ -168,6 +169,7 @@ ; CHECK-TAIL: nop ; CHECK-TAIL-NEXT: //NO_APP ; CHECK-TAIL-NEXT: ldr x20, [sp], #16 +; CHECK-TAIL-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-TAIL-NEXT: add sp, sp, #32 ; CHECK-TAIL-NEXT: ret @@ -177,6 +179,7 @@ ; CHECK-TAIL-RZ: nop ; CHECK-TAIL-RZ-NEXT: //NO_APP ; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16 +; CHECK-TAIL-RZ-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 ; CHECK-TAIL-RZ-NEXT: ret @@ -193,6 +196,7 @@ ; CHECK: nop ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldr x20, [sp, #16] +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -202,6 +206,7 @@ ; CHECK-TAIL: nop ; CHECK-TAIL-NEXT: //NO_APP ; CHECK-TAIL-NEXT: ldr x20, [sp, #16] +; CHECK-TAIL-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-TAIL-NEXT: add sp, sp, #64 ; CHECK-TAIL-NEXT: ret @@ -211,6 +216,7 @@ ; CHECK-TAIL-RZ: nop ; CHECK-TAIL-RZ-NEXT: //NO_APP ; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16 +; CHECK-TAIL-RZ-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-TAIL-RZ-NEXT: add sp, sp, #32 ; CHECK-TAIL-RZ-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -276,6 +276,7 @@ ; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f128.v1i32(<1 x fp128> %f) @@ -339,12 +340,15 @@ ; CHECK-NEXT: bl __unordtf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csel w8, wzr, w19, ne +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov v0.s[1], w22 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f128.v2i32(<2 x fp128> %f) @@ -432,9 +436,12 @@ ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.s[2], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 ; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[2], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f128.v3i32(<3 x fp128> %f) @@ -542,9 +549,12 @@ ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: add sp, sp, #144 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f128.v4i32(<4 x fp128> %f) @@ -1060,14 +1070,19 @@ ; CHECK-NEXT: csel x8, x22, x8, gt ; CHECK-NEXT: fcmp s0, s0 ; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x1, xzr, x8, vs ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float> %f) @@ -1127,14 +1142,19 @@ ; CHECK-NEXT: csel x8, x22, x8, gt ; CHECK-NEXT: fcmp s0, s0 ; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x1, xzr, x8, vs ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float> %f) @@ -1388,14 +1408,19 @@ ; CHECK-NEXT: csel x8, x22, x8, gt ; CHECK-NEXT: fcmp d0, d0 ; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x1, xzr, x8, vs ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f) @@ -1454,14 +1479,19 @@ ; CHECK-NEXT: csel x8, x22, x8, gt ; CHECK-NEXT: fcmp d0, d0 ; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x1, xzr, x8, vs ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f) @@ -1912,21 +1942,28 @@ ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: csel x1, xzr, x8, vs +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: mov x4, x21 ; CHECK-NEXT: mov x5, x22 ; CHECK-NEXT: mov x6, x23 ; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half> %f) @@ -2020,21 +2057,28 @@ ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: csel x1, xzr, x8, vs +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: mov x4, x21 ; CHECK-NEXT: mov x5, x22 ; CHECK-NEXT: mov x6, x23 ; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half> %f) diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -269,6 +269,7 @@ ; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f) @@ -320,8 +321,10 @@ ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov v0.s[1], w20 ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 64 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f) @@ -388,8 +391,10 @@ ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.s[2], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[2], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 80 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f) @@ -472,8 +477,10 @@ ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f128.v4i32(<4 x fp128> %f) @@ -946,11 +953,14 @@ ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f) @@ -995,12 +1005,15 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: csinv x1, x9, xzr, le ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f) @@ -1213,11 +1226,14 @@ ; CHECK-NEXT: fcmp d0, d9 ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f) @@ -1261,12 +1277,15 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp d0, d9 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: csinv x1, x9, xzr, le ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f) @@ -1641,21 +1660,26 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: mov x4, x20 +; CHECK-NEXT: mov x5, x19 +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x1, x25, x9, gt +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: mov x2, x22 ; CHECK-NEXT: mov x3, x21 -; CHECK-NEXT: mov x4, x20 -; CHECK-NEXT: mov x5, x19 ; CHECK-NEXT: mov x6, x24 ; CHECK-NEXT: mov x7, x23 -; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f) @@ -1728,19 +1752,24 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: csinv x1, x9, xzr, le +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: mov x4, x21 ; CHECK-NEXT: mov x5, x22 ; CHECK-NEXT: mov x6, x23 ; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 16 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f) diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -35,6 +35,7 @@ ; CHECK-NEXT: .LBB0_3: // %common.ret ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: .cfi_adjust_cfa_offset 96 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/local_vars.ll b/llvm/test/CodeGen/AArch64/local_vars.ll --- a/llvm/test/CodeGen/AArch64/local_vars.ll +++ b/llvm/test/CodeGen/AArch64/local_vars.ll @@ -39,9 +39,11 @@ ret void ; CHECK: ldr x30, [sp], #16 +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret ; CHECK-WITHFP-ARM64: ldp x29, x30, [sp], #16 +; CHECK-WITHFP-ARM64-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-WITHFP-ARM64-NEXT: ret } diff --git a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll --- a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -56,6 +56,7 @@ ; CHECK-NEXT: str w19, [x8] ; CHECK-NEXT: str w9, [x8] ; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %val1 = load i32, i32* @var1_32 %val2 = load i32, i32* @var2_32 @@ -176,6 +177,7 @@ ; CHECK-NEXT: str x19, [x8] ; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %val1 = load i64, i64* @var1_64 %val2 = load i64, i64* @var2_64 diff --git a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll --- a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll @@ -36,6 +36,7 @@ ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 @@ -92,6 +93,7 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 @@ -149,6 +151,7 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %cmp63 = icmp sgt i32 %n, 0 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: bl [[OUTLINED_DIRECT:OUTLINED_FUNCTION_[0-9]+]] ; CHECK-NEXT: add w0, w0, #8 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) @@ -31,6 +32,7 @@ ; CHECK-NEXT: bl [[OUTLINED_DIRECT]] ; CHECK-NEXT: add w0, w0, #88 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) @@ -47,6 +49,7 @@ ; CHECK-NEXT: bl [[OUTLINED_INDIRECT:OUTLINED_FUNCTION_[0-9]+]] ; CHECK-NEXT: add w0, w0, #8 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) @@ -63,6 +66,7 @@ ; CHECK-NEXT: bl [[OUTLINED_INDIRECT]] ; CHECK-NEXT: add w0, w0, #88 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -45,6 +45,7 @@ ; A53-NEXT: adrp x8, gv1 ; A53-NEXT: str x0, [x8, :lo12:gv1] ; A53-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; A53-NEXT: .cfi_adjust_cfa_offset -16 ; A53-NEXT: ret ; A53-NEXT: .LBB0_4: // %while.body.i.split ; A53-NEXT: // =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll --- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll +++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll @@ -24,12 +24,14 @@ ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w0, w8, #0x100 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: cbz w0, .LBB0_5 ; CHECK-NEXT: .LBB0_3: // %common.ret.sink.split ; CHECK-NEXT: b extfunc ; CHECK-NEXT: .LBB0_4: // %b2 ; CHECK-NEXT: bl extfunc ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: cbnz w0, .LBB0_3 ; CHECK-NEXT: .LBB0_5: // %common.ret ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -166,6 +166,7 @@ ; CHECK-NEXT: stp q0, q2, [x8, #464] ; CHECK-NEXT: str q1, [x8, #496] ; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -64 ; CHECK-NEXT: ret entry: br label %for.cond1.preheader diff --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll --- a/llvm/test/CodeGen/AArch64/shift_minsize.ll +++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll @@ -16,6 +16,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: lsl x0, x0, x1 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f0: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: lsl x0, x0, x1 +; CHECK-DARWIN-NEXT: ret %res = shl i64 %val, %amt ret i64 %res } @@ -26,6 +31,12 @@ ; CHECK-NEXT: lsl x0, x0, x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f1: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: lsl x0, x0, x1 +; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-DARWIN-NEXT: ret %a = shl i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -37,6 +48,12 @@ ; CHECK-NEXT: asr x0, x0, x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f2: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: asr x0, x0, x1 +; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-DARWIN-NEXT: ret %a = ashr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -48,6 +65,12 @@ ; CHECK-NEXT: lsr x0, x0, x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: f3: +; CHECK-DARWIN: ; %bb.0: +; CHECK-DARWIN-NEXT: lsr x0, x0, x1 +; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-DARWIN-NEXT: ret %a = lshr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -62,7 +85,22 @@ ; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __ashlti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: shl128: +; CHECK-DARWIN: ; %bb.0: ; %entry +; CHECK-DARWIN-NEXT: mvn w8, w2 +; CHECK-DARWIN-NEXT: lsr x9, x0, #1 +; CHECK-DARWIN-NEXT: mov w10, w2 +; CHECK-DARWIN-NEXT: lsr x8, x9, x8 +; CHECK-DARWIN-NEXT: lsl x9, x1, x10 +; CHECK-DARWIN-NEXT: lsl x11, x0, x10 +; CHECK-DARWIN-NEXT: tst x10, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x9, x8 +; CHECK-DARWIN-NEXT: csel x1, x11, x8, ne +; CHECK-DARWIN-NEXT: csel x0, xzr, x11, ne +; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 @@ -89,7 +127,23 @@ ; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __ashrti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: ashr128: +; CHECK-DARWIN: ; %bb.0: ; %entry +; CHECK-DARWIN-NEXT: mov w8, w2 +; CHECK-DARWIN-NEXT: mvn w9, w2 +; CHECK-DARWIN-NEXT: lsl x10, x1, #1 +; CHECK-DARWIN-NEXT: lsl x9, x10, x9 +; CHECK-DARWIN-NEXT: lsr x11, x0, x8 +; CHECK-DARWIN-NEXT: asr x10, x1, #63 +; CHECK-DARWIN-NEXT: asr x12, x1, x8 +; CHECK-DARWIN-NEXT: tst x8, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x9, x11 +; CHECK-DARWIN-NEXT: csel x0, x12, x8, ne +; CHECK-DARWIN-NEXT: csel x1, x10, x12, ne +; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64 @@ -115,7 +169,22 @@ ; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __lshrti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret +; +; CHECK-DARWIN-LABEL: lshr128: +; CHECK-DARWIN: ; %bb.0: ; %entry +; CHECK-DARWIN-NEXT: mov w8, w2 +; CHECK-DARWIN-NEXT: mvn w9, w2 +; CHECK-DARWIN-NEXT: lsl x10, x1, #1 +; CHECK-DARWIN-NEXT: lsr x11, x0, x8 +; CHECK-DARWIN-NEXT: lsl x9, x10, x9 +; CHECK-DARWIN-NEXT: lsr x10, x1, x8 +; CHECK-DARWIN-NEXT: tst x8, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x9, x11 +; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne +; CHECK-DARWIN-NEXT: csel x1, xzr, x10, ne +; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64 diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -154,6 +154,7 @@ ; CHECK-NEXT: and w0, w19, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -32 ; CHECK-NEXT: ret entry: %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)] diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -524,6 +524,7 @@ ; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0 ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -228,6 +228,7 @@ ; CHECK-NEXT: st1d { z3.d }, p0, [x0] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %v0 = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv0, i64 0) %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( %v0, <2 x i64> %sv1, i64 4) @@ -265,6 +266,7 @@ ; CHECK-NEXT: st1d { z1.d }, p0, [x1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 2) diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -36,6 +36,7 @@ ; CHECK-NEXT: ldrb w0, [x8, x9] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i8 %ext @@ -61,6 +62,7 @@ ; CHECK-NEXT: ldrh w0, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i16 %ext @@ -86,6 +88,7 @@ ; CHECK-NEXT: ldr w0, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i32 %ext @@ -113,6 +116,7 @@ ; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ext = extractelement %a, i32 %idx ret i64 %ext @@ -157,6 +161,7 @@ ; CHECK-NEXT: ldrh w0, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ext = extractelement %a, i32 128 ret i16 %ext @@ -184,6 +189,7 @@ ; CHECK-NEXT: ldr w0, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ext = extractelement %a, i32 100000 ret i32 %ext @@ -208,6 +214,7 @@ ; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ext = extractelement %a, i32 10 ret i64 %ext diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -36,6 +36,7 @@ ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ins = insertelement %a, i8 %elt, i64 %idx ret %ins @@ -61,6 +62,7 @@ ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ins = insertelement %a, float %elt, i64 %idx ret %ins @@ -90,6 +92,7 @@ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ins = insertelement %a, i64 %elt, i64 %idx ret %ins @@ -153,6 +156,7 @@ ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ins = insertelement %a, i16 %elt, i64 128 ret %ins @@ -180,6 +184,7 @@ ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %ins = insertelement %a, i32 %elt, i64 1000000 ret %ins diff --git a/llvm/test/CodeGen/AArch64/sve-varargs.ll b/llvm/test/CodeGen/AArch64/sve-varargs.ll --- a/llvm/test/CodeGen/AArch64/sve-varargs.ll +++ b/llvm/test/CodeGen/AArch64/sve-varargs.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: add x0, x0, :lo12:.str_1 ; CHECK-NEXT: bl sve_printf ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 ; CHECK-NEXT: ret %f = getelementptr [6 x i8], [6 x i8]* @.str_1, i64 0, i64 0 call i32 (i8*, , ...) @sve_printf(i8* %f, %x) diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll --- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -302,28 +302,46 @@ ; CHECK-NEXT: .LBB1_1: // %.Lcontinue ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: add sp, sp, #304 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %.Lunwind ; CHECK-NEXT: .Ltmp5: ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; CHECK-NEXT: .cfi_adjust_cfa_offset 32 ; CHECK-NEXT: add sp, sp, #304 ; CHECK-NEXT: ret ; @@ -365,32 +383,50 @@ ; GISEL-NEXT: bl may_throw_neon ; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp4: -; GISEL-NEXT: b .LBB1_1 +; GISEL-NEXT: b .LBB1_1 ; GISEL-NEXT: .LBB1_1: // %.Lcontinue ; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: add sp, sp, #304 ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB1_2: // %.Lunwind ; GISEL-NEXT: .Ltmp5: ; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q13, q12, [sp, #192] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q15, q14, [sp, #160] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q17, q16, [sp, #128] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q19, q18, [sp, #96] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q21, q20, [sp, #64] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: ldp q23, q22, [sp, #32] // 32-byte Folded Reload +; GISEL-NEXT: .cfi_adjust_cfa_offset 32 ; GISEL-NEXT: add sp, sp, #304 ; GISEL-NEXT: ret %result = invoke aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v) to label %.Lcontinue unwind label %.Lunwind