diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -161,10 +161,16 @@ /// Current MachineFunction. MachineFunction *MachineFunc = nullptr; + /// Is `true` for block numbers where we can guarantee no stack access + /// or computation of stack-relative addresses on any CFG path including + /// the block itself. + BitVector NoStackAddressUsedBlockInfo; + /// Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. - bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; + bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, + bool NoStackAddressUsed) const; const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { if (CurrentCSRs.empty()) { @@ -190,7 +196,9 @@ // Try to find safe point based on dominance and block frequency without // any change in IR. - bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS); + bool performShrinkWrapping( + const ReversePostOrderTraversal &RPOT, + RegScavenger *RS); /// This function tries to split the restore point if doing so can shrink the /// save point further. \return True if restore point is split. @@ -285,8 +293,8 @@ INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) -bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, - RegScavenger *RS) const { +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, + bool NoStackAddressUsed) const { /// Check if \p Op is known to access an address not on the function's stack . /// At the moment, accesses where the underlying object is a global, function /// argument, or jump table are considered non-stack accesses. Note that the @@ -306,12 +314,9 @@ return PSV->isJumpTable(); return false; }; - // This prevents premature stack popping when occurs a indirect stack - // access. It is overly aggressive for the moment. - // TODO: - // - Further, data dependency and alias analysis can validate - // that load and stores never derive from the stack pointer. - if (MI.mayLoadOrStore() && + // Load/store operations may access the stack indirectly when we previously + // computed an address to a stack location. + if (!NoStackAddressUsed && MI.mayLoadOrStore() && (MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() || !all_of(MI.memoperands(), IsKnownNonStackPtr))) return true; @@ -553,7 +558,7 @@ SmallVectorImpl &CleanPreds, const TargetInstrInfo *TII, RegScavenger *RS) { for (const MachineInstr &MI : *CurRestore) - if (useOrDefCSROrFI(MI, RS)) + if (useOrDefCSROrFI(MI, RS, /*NoStackAddressUsed=*/false)) return false; for (MachineBasicBlock *PredBB : CurRestore->predecessors()) { @@ -613,7 +618,7 @@ continue; } for (const MachineInstr &MI : MBB) - if (useOrDefCSROrFI(MI, RS)) { + if (useOrDefCSROrFI(MI, RS, /*NoStackAddressUsed=*/false)) { DirtyBBs.insert(&MBB); break; } @@ -700,7 +705,7 @@ // terminator. if (Restore == &MBB) { for (const MachineInstr &Terminator : MBB.terminators()) { - if (!useOrDefCSROrFI(Terminator, RS)) + if (!useOrDefCSROrFI(Terminator, RS, /*NoStackAddressUsed=*/false)) continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { @@ -807,23 +812,24 @@ return false; } -bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) { - for (MachineBasicBlock &MBB : MF) { - LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' - << MBB.getName() << '\n'); +bool ShrinkWrap::performShrinkWrapping( + const ReversePostOrderTraversal &RPOT, + RegScavenger *RS) { + for (MachineBasicBlock *MBB : RPOT) { + LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n'); - if (MBB.isEHFuncletEntry()) + if (MBB->isEHFuncletEntry()) return giveUpWithRemarks(ORE, "UnsupportedEHFunclets", "EH Funclets are not supported yet.", - MBB.front().getDebugLoc(), &MBB); + MBB->front().getDebugLoc(), MBB); - if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) { + if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) { // Push the prologue and epilogue outside of the region that may throw (or // jump out via inlineasm_br), by making sure that all the landing pads // are at least at the boundary of the save and restore points. The // problem is that a basic block can jump out from the middle in these // cases, which we do not handle. - updateSaveRestorePoints(MBB, RS); + updateSaveRestorePoints(*MBB, RS); if (!ArePointsInteresting()) { LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n"); return false; @@ -831,22 +837,37 @@ continue; } - for (const MachineInstr &MI : MBB) { - if (!useOrDefCSROrFI(MI, RS)) - continue; - // Save (resp. restore) point must dominate (resp. post dominate) - // MI. Look for the proper basic block for those. - updateSaveRestorePoints(MBB, RS); - // If we are at a point where we cannot improve the placement of - // save/restore instructions, just give up. - if (!ArePointsInteresting()) { - LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); - return false; + bool NoStackAddressUsed = true; + // Check if we found any stack accesses in the predecessors. We are not + // doing a full dataflow analysis here to keep things simple but just + // rely on a reverse portorder traversal (RPOT) to guarantee predecessors + // are already processed except for loops (and accept the conservative + // result for loops). + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (!NoStackAddressUsedBlockInfo.test(Pred->getNumber())) { + NoStackAddressUsed = false; + break; } - // No need to look for other instructions, this basic block - // will already be part of the handled region. - break; } + + for (const MachineInstr &MI : *MBB) { + if (useOrDefCSROrFI(MI, RS, NoStackAddressUsed)) { + // Save (resp. restore) point must dominate (resp. post dominate) + // MI. Look for the proper basic block for those. + updateSaveRestorePoints(*MBB, RS); + // If we are at a point where we cannot improve the placement of + // save/restore instructions, just give up. + if (!ArePointsInteresting()) { + LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); + return false; + } + // No need to look for other instructions, this basic block + // will already be part of the handled region. + NoStackAddressUsed = false; + break; + } + } + NoStackAddressUsedBlockInfo[MBB->getNumber()] = NoStackAddressUsed; } if (!ArePointsInteresting()) { // If the points are not interesting at this point, then they must be null @@ -860,13 +881,13 @@ LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq << '\n'); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetFrameLowering *TFI = + MachineFunc->getSubtarget().getFrameLowering(); do { LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " - << Save->getNumber() << ' ' << Save->getName() << ' ' + << printMBBReference(*Save) << ' ' << MBFI->getBlockFreq(Save).getFrequency() - << "\nRestore: " << Restore->getNumber() << ' ' - << Restore->getName() << ' ' + << "\nRestore: " << printMBBReference(*Restore) << ' ' << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; @@ -927,7 +948,9 @@ bool Changed = false; - bool HasCandidate = performShrinkWrapping(MF, RS.get()); + NoStackAddressUsedBlockInfo.resize(MF.getNumBlockIDs()); + bool HasCandidate = performShrinkWrapping(RPOT, RS.get()); + NoStackAddressUsedBlockInfo.clear(); Changed = postShrinkWrapping(HasCandidate, MF, RS.get()); if (!HasCandidate && !Changed) return false; @@ -935,9 +958,8 @@ return Changed; LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " - << Save->getNumber() << ' ' << Save->getName() - << "\nRestore: " << Restore->getNumber() << ' ' - << Restore->getName() << '\n'); + << printMBBReference(*Save) << ' ' + << "\nRestore: " << printMBBReference(*Restore) << '\n'); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setSavePoint(Save); diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll b/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll @@ -5,7 +5,6 @@ define void @split_block_no_fallthrough(i64 %val) #0 { ; CHECK-LABEL: split_block_no_fallthrough: ; CHECK: ; %bb.0: ; %bb -; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: cmn x0, #5 ; CHECK-NEXT: b.le LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %b3 @@ -13,15 +12,16 @@ ; CHECK-NEXT: cbnz w8, LBB0_2 ; CHECK-NEXT: b LBB0_4 ; CHECK-NEXT: LBB0_2: ; %common.ret -; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_3: ; %b2 -; CHECK-NEXT: mov w0, #93 +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov w0, #93 ; =0x5d ; CHECK-NEXT: bl _extfunc -; CHECK-NEXT: cbnz w0, LBB0_2 -; CHECK-NEXT: LBB0_4: ; %b7 -; CHECK-NEXT: mov w0, #13 ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: cbz w0, LBB0_4 +; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: LBB0_4: ; %b7 +; CHECK-NEXT: mov w0, #13 ; =0xd ; CHECK-NEXT: b _extfunc bb: %c0 = icmp sgt i64 %val, -5 diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll --- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll +++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll @@ -13,25 +13,20 @@ define void @func() uwtable { ; CHECK-LABEL: func: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: cbnz w8, .LBB0_3 ; CHECK-NEXT: // %bb.1: // %b1 -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: cbz wzr, .LBB0_4 ; CHECK-NEXT: // %bb.2: // %b3 ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w0, w8, #0x100 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: cbz w0, .LBB0_5 ; CHECK-NEXT: .LBB0_3: // %common.ret.sink.split ; CHECK-NEXT: b extfunc ; CHECK-NEXT: .LBB0_4: // %b2 -; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl extfunc ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1960,23 +1960,24 @@ ; ARM-ENABLE-LABEL: debug_info: ; ARM-ENABLE: Lfunc_begin12: ; ARM-ENABLE-NEXT: @ %bb.0: @ %bb +; ARM-ENABLE-NEXT: tst r2, #1 +; ARM-ENABLE-NEXT: beq LBB12_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %bb3 ; ARM-ENABLE-NEXT: push {r4, r7, lr} ; ARM-ENABLE-NEXT: add r7, sp, #4 ; ARM-ENABLE-NEXT: sub r4, sp, #16 ; ARM-ENABLE-NEXT: bfc r4, #0, #4 ; ARM-ENABLE-NEXT: mov sp, r4 -; ARM-ENABLE-NEXT: tst r2, #1 -; ARM-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] -; ARM-ENABLE-NEXT: beq LBB12_2 -; ARM-ENABLE-NEXT: @ %bb.1: @ %bb3 ; ARM-ENABLE-NEXT: ldr r1, [r7, #8] +; ARM-ENABLE-NEXT: mov r2, r3 +; ARM-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] ; ARM-ENABLE-NEXT: vmov s16, r0 ; ARM-ENABLE-NEXT: mov r0, r3 -; ARM-ENABLE-NEXT: mov r2, r3 ; ARM-ENABLE-NEXT: vmov d9, r3, r1 ; ARM-ENABLE-NEXT: mov r3, r1 ; ARM-ENABLE-NEXT: bl _pow ; ARM-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 +; ARM-ENABLE-NEXT: mov r4, sp ; ARM-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 ; ARM-ENABLE-NEXT: vadd.f64 d16, d9, d16 ; ARM-ENABLE-NEXT: vcmp.f32 s16, s0 @@ -1989,17 +1990,17 @@ ; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr ; ARM-ENABLE-NEXT: vmovne.f64 d9, d17 ; ARM-ENABLE-NEXT: vcvt.f32.f64 s0, d9 -; ARM-ENABLE-NEXT: b LBB12_3 +; ARM-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] +; ARM-ENABLE-NEXT: sub sp, r7, #4 +; ARM-ENABLE-NEXT: pop {r4, r7, lr} +; ARM-ENABLE-NEXT: vmov r0, s0 +; ARM-ENABLE-NEXT: bx lr ; ARM-ENABLE-NEXT: LBB12_2: ; ARM-ENABLE-NEXT: vldr s0, LCPI12_0 -; ARM-ENABLE-NEXT: LBB12_3: @ %bb13 -; ARM-ENABLE-NEXT: mov r4, sp -; ARM-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] ; ARM-ENABLE-NEXT: vmov r0, s0 -; ARM-ENABLE-NEXT: sub sp, r7, #4 -; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: bx lr ; ARM-ENABLE-NEXT: .p2align 2 -; ARM-ENABLE-NEXT: @ %bb.4: +; ARM-ENABLE-NEXT: @ %bb.3: ; ARM-ENABLE-NEXT: .data_region ; ARM-ENABLE-NEXT: LCPI12_0: ; ARM-ENABLE-NEXT: .long 0x00000000 @ float 0 @@ -2058,23 +2059,24 @@ ; THUMB-ENABLE-LABEL: debug_info: ; THUMB-ENABLE: Lfunc_begin12: ; THUMB-ENABLE-NEXT: @ %bb.0: @ %bb +; THUMB-ENABLE-NEXT: lsls r1, r2, #31 +; THUMB-ENABLE-NEXT: beq LBB12_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %bb3 ; THUMB-ENABLE-NEXT: push {r4, r7, lr} ; THUMB-ENABLE-NEXT: add r7, sp, #4 ; THUMB-ENABLE-NEXT: sub.w r4, sp, #16 ; THUMB-ENABLE-NEXT: bfc r4, #0, #4 ; THUMB-ENABLE-NEXT: mov sp, r4 -; THUMB-ENABLE-NEXT: lsls r1, r2, #31 -; THUMB-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] -; THUMB-ENABLE-NEXT: beq LBB12_2 -; THUMB-ENABLE-NEXT: @ %bb.1: @ %bb3 ; THUMB-ENABLE-NEXT: ldr r1, [r7, #8] +; THUMB-ENABLE-NEXT: mov r2, r3 +; THUMB-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] ; THUMB-ENABLE-NEXT: vmov s16, r0 ; THUMB-ENABLE-NEXT: mov r0, r3 -; THUMB-ENABLE-NEXT: mov r2, r3 ; THUMB-ENABLE-NEXT: vmov d9, r3, r1 ; THUMB-ENABLE-NEXT: mov r3, r1 ; THUMB-ENABLE-NEXT: bl _pow ; THUMB-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 +; THUMB-ENABLE-NEXT: mov r4, sp ; THUMB-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 ; THUMB-ENABLE-NEXT: vmov.f64 d18, d9 ; THUMB-ENABLE-NEXT: vcmp.f32 s16, s0 @@ -2089,18 +2091,18 @@ ; THUMB-ENABLE-NEXT: it ne ; THUMB-ENABLE-NEXT: vmovne.f64 d9, d17 ; THUMB-ENABLE-NEXT: vcvt.f32.f64 s0, d9 -; THUMB-ENABLE-NEXT: b LBB12_3 -; THUMB-ENABLE-NEXT: LBB12_2: -; THUMB-ENABLE-NEXT: vldr s0, LCPI12_0 -; THUMB-ENABLE-NEXT: LBB12_3: @ %bb13 -; THUMB-ENABLE-NEXT: mov r4, sp ; THUMB-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] ; THUMB-ENABLE-NEXT: subs r4, r7, #4 -; THUMB-ENABLE-NEXT: vmov r0, s0 ; THUMB-ENABLE-NEXT: mov sp, r4 -; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: pop.w {r4, r7, lr} +; THUMB-ENABLE-NEXT: vmov r0, s0 +; THUMB-ENABLE-NEXT: bx lr +; THUMB-ENABLE-NEXT: LBB12_2: +; THUMB-ENABLE-NEXT: vldr s0, LCPI12_0 +; THUMB-ENABLE-NEXT: vmov r0, s0 +; THUMB-ENABLE-NEXT: bx lr ; THUMB-ENABLE-NEXT: .p2align 2 -; THUMB-ENABLE-NEXT: @ %bb.4: +; THUMB-ENABLE-NEXT: @ %bb.3: ; THUMB-ENABLE-NEXT: .data_region ; THUMB-ENABLE-NEXT: LCPI12_0: ; THUMB-ENABLE-NEXT: .long 0x00000000 @ float 0 diff --git a/llvm/test/CodeGen/ARM/machine-sink-multidef.ll b/llvm/test/CodeGen/ARM/machine-sink-multidef.ll --- a/llvm/test/CodeGen/ARM/machine-sink-multidef.ll +++ b/llvm/test/CodeGen/ARM/machine-sink-multidef.ll @@ -9,8 +9,6 @@ define arm_aapcscc void @g() { ; CHECK-LABEL: g: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: ldr r0, .LCPI0_0 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: ldr r1, .LCPI0_1 @@ -19,9 +17,10 @@ ; CHECK-NEXT: ldr r0, [r1, r0, lsl #3]! ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: popne {r11, lr} ; CHECK-NEXT: movne pc, lr ; CHECK-NEXT: .LBB0_1: @ %if.then5 +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: ldr r1, [r1, #4] ; CHECK-NEXT: bl k ; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll --- a/llvm/test/CodeGen/ARM/swifterror.ll +++ b/llvm/test/CodeGen/ARM/swifterror.ll @@ -275,10 +275,12 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-APPLE-LABEL: foo_if: ; CHECK-APPLE: @ %bb.0: @ %entry -; CHECK-APPLE-NEXT: push {lr} ; CHECK-APPLE-NEXT: cmp r0, #0 -; CHECK-APPLE-NEXT: beq LBB3_2 -; CHECK-APPLE-NEXT: @ %bb.1: @ %gen_error +; CHECK-APPLE-NEXT: vldreq s0, LCPI3_0 +; CHECK-APPLE-NEXT: vmoveq r0, s0 +; CHECK-APPLE-NEXT: bxeq lr +; CHECK-APPLE-NEXT: LBB3_1: @ %gen_error +; CHECK-APPLE-NEXT: push {lr} ; CHECK-APPLE-NEXT: mov r0, #16 ; CHECK-APPLE-NEXT: mov r1, #0 ; CHECK-APPLE-NEXT: bl _malloc @@ -286,15 +288,11 @@ ; CHECK-APPLE-NEXT: mov r0, #1 ; CHECK-APPLE-NEXT: vmov.f32 s0, #1.000000e+00 ; CHECK-APPLE-NEXT: strb r0, [r8, #8] -; CHECK-APPLE-NEXT: b LBB3_3 -; CHECK-APPLE-NEXT: LBB3_2: -; CHECK-APPLE-NEXT: vldr s0, LCPI3_0 -; CHECK-APPLE-NEXT: LBB3_3: @ %common.ret -; CHECK-APPLE-NEXT: vmov r0, s0 ; CHECK-APPLE-NEXT: pop {lr} +; CHECK-APPLE-NEXT: vmov r0, s0 ; CHECK-APPLE-NEXT: bx lr ; CHECK-APPLE-NEXT: .p2align 2 -; CHECK-APPLE-NEXT: @ %bb.4: +; CHECK-APPLE-NEXT: @ %bb.2: ; CHECK-APPLE-NEXT: .data_region ; CHECK-APPLE-NEXT: LCPI3_0: ; CHECK-APPLE-NEXT: .long 0x00000000 @ float 0 @@ -327,26 +325,25 @@ ; ; CHECK-ANDROID-LABEL: foo_if: ; CHECK-ANDROID: @ %bb.0: @ %entry +; CHECK-ANDROID-NEXT: cmp r0, #0 +; CHECK-ANDROID-NEXT: vldreq s0, .LCPI3_0 +; CHECK-ANDROID-NEXT: vmoveq r0, s0 +; CHECK-ANDROID-NEXT: bxeq lr +; CHECK-ANDROID-NEXT: .LBB3_1: @ %gen_error ; CHECK-ANDROID-NEXT: .save {r11, lr} ; CHECK-ANDROID-NEXT: push {r11, lr} -; CHECK-ANDROID-NEXT: cmp r0, #0 -; CHECK-ANDROID-NEXT: beq .LBB3_2 -; CHECK-ANDROID-NEXT: @ %bb.1: @ %gen_error ; CHECK-ANDROID-NEXT: mov r0, #16 ; CHECK-ANDROID-NEXT: mov r1, #0 ; CHECK-ANDROID-NEXT: bl malloc -; CHECK-ANDROID-NEXT: vmov.f32 s0, #1.000000e+00 ; CHECK-ANDROID-NEXT: mov r8, r0 ; CHECK-ANDROID-NEXT: mov r0, #1 +; CHECK-ANDROID-NEXT: vmov.f32 s0, #1.000000e+00 ; CHECK-ANDROID-NEXT: strb r0, [r8, #8] +; CHECK-ANDROID-NEXT: pop {r11, lr} ; CHECK-ANDROID-NEXT: vmov r0, s0 -; CHECK-ANDROID-NEXT: pop {r11, pc} -; CHECK-ANDROID-NEXT: .LBB3_2: -; CHECK-ANDROID-NEXT: vldr s0, .LCPI3_0 -; CHECK-ANDROID-NEXT: vmov r0, s0 -; CHECK-ANDROID-NEXT: pop {r11, pc} +; CHECK-ANDROID-NEXT: bx lr ; CHECK-ANDROID-NEXT: .p2align 2 -; CHECK-ANDROID-NEXT: @ %bb.3: +; CHECK-ANDROID-NEXT: @ %bb.2: ; CHECK-ANDROID-NEXT: .LCPI3_0: ; CHECK-ANDROID-NEXT: .long 0x00000000 @ float 0 diff --git a/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll b/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll --- a/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll +++ b/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll @@ -201,20 +201,9 @@ define void @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE(ptr nocapture readonly %this, i32 zeroext %LineIndex, i32 zeroext %TailOffset, [2 x i64] %Split.coerce, ptr dereferenceable(1504) %Whitespaces) unnamed_addr #1 align 2 { ; CHECK-LABEL: _ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: .cfi_def_cfa_offset 160 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r28, -32 -; CHECK-NEXT: .cfi_offset r29, -24 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: stdu 1, -160(1) -; CHECK-NEXT: std 0, 176(1) -; CHECK-NEXT: mr 12, 8 ; CHECK-NEXT: ld 10, 56(3) ; CHECK-NEXT: lwz 0, 40(3) +; CHECK-NEXT: mr 12, 8 ; CHECK-NEXT: cmpldi 10, 0 ; CHECK-NEXT: beq 0, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.end.i.i @@ -226,16 +215,27 @@ ; CHECK-NEXT: ld 9, 48(3) ; CHECK-NEXT: crxor 2, 2, 2 ; CHECK-NEXT: .LBB0_3: # %_ZNK4llvm9StringRef10startswithES0_.exit +; CHECK-NEXT: mflr 4 +; CHECK-NEXT: .cfi_def_cfa_offset 160 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -32 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: stdu 1, -160(1) +; CHECK-NEXT: std 4, 176(1) ; CHECK-NEXT: li 8, 0 ; CHECK-NEXT: li 11, 1 ; CHECK-NEXT: add 5, 6, 5 +; CHECK-NEXT: iseleq 30, 11, 8 +; CHECK-NEXT: ld 11, 64(3) ; CHECK-NEXT: lbz 29, 20(3) ; CHECK-NEXT: lwz 28, 16(3) +; CHECK-NEXT: add 5, 5, 10 ; CHECK-NEXT: ld 4, 8(3) -; CHECK-NEXT: iseleq 30, 11, 8 -; CHECK-NEXT: ld 11, 64(3) ; CHECK-NEXT: ld 8, 72(3) -; CHECK-NEXT: add 5, 5, 10 ; CHECK-NEXT: sub 3, 0, 30 ; CHECK-NEXT: clrldi 5, 5, 32 ; CHECK-NEXT: li 0, 1 @@ -243,8 +243,8 @@ ; CHECK-NEXT: extsw 30, 3 ; CHECK-NEXT: mr 3, 12 ; CHECK-NEXT: mr 7, 11 -; CHECK-NEXT: std 28, 112(1) ; CHECK-NEXT: std 0, 104(1) +; CHECK-NEXT: std 28, 112(1) ; CHECK-NEXT: std 29, 96(1) ; CHECK-NEXT: std 30, 120(1) ; CHECK-NEXT: bl _ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll --- a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll @@ -38,7 +38,7 @@ ; 32SMALL-ASM: # %bb.0: # %entry ; 32SMALL-ASM-NEXT: addi 3, 3, -1 ; 32SMALL-ASM-NEXT: cmplwi 3, 3 -; 32SMALL-ASM-NEXT: bgt 0, L..BB0_6 +; 32SMALL-ASM-NEXT: bgt 0, L..BB0_3 ; 32SMALL-ASM-NEXT: # %bb.1: # %entry ; 32SMALL-ASM-NEXT: lwz 4, L..C0(2) # %jump-table.0 ; 32SMALL-ASM-NEXT: slwi 3, 3, 2 @@ -47,32 +47,32 @@ ; 32SMALL-ASM-NEXT: mtctr 3 ; 32SMALL-ASM-NEXT: bctr ; 32SMALL-ASM-NEXT: L..BB0_2: # %sw.bb -; 32SMALL-ASM-NEXT: li 3, 0 ; 32SMALL-ASM-NEXT: #APP ; 32SMALL-ASM-NEXT: #NO_APP +; 32SMALL-ASM-NEXT: L..BB0_3: # %sw.epilog +; 32SMALL-ASM-NEXT: li 3, 0 ; 32SMALL-ASM-NEXT: blr -; 32SMALL-ASM-NEXT: L..BB0_3: # %sw.bb1 +; 32SMALL-ASM-NEXT: L..BB0_4: # %sw.bb1 ; 32SMALL-ASM-NEXT: li 3, 0 ; 32SMALL-ASM-NEXT: #APP ; 32SMALL-ASM-NEXT: #NO_APP ; 32SMALL-ASM-NEXT: blr -; 32SMALL-ASM-NEXT: L..BB0_4: # %sw.bb2 +; 32SMALL-ASM-NEXT: L..BB0_5: # %sw.bb2 ; 32SMALL-ASM-NEXT: li 3, 0 ; 32SMALL-ASM-NEXT: #APP ; 32SMALL-ASM-NEXT: #NO_APP ; 32SMALL-ASM-NEXT: blr -; 32SMALL-ASM-NEXT: L..BB0_5: # %sw.bb3 +; 32SMALL-ASM-NEXT: L..BB0_6: # %sw.bb3 +; 32SMALL-ASM-NEXT: li 3, 0 ; 32SMALL-ASM-NEXT: #APP ; 32SMALL-ASM-NEXT: #NO_APP -; 32SMALL-ASM-NEXT: L..BB0_6: # %sw.epilog -; 32SMALL-ASM-NEXT: li 3, 0 ; 32SMALL-ASM-NEXT: blr ; ; 32LARGE-ASM-LABEL: jump_table: ; 32LARGE-ASM: # %bb.0: # %entry ; 32LARGE-ASM-NEXT: addi 3, 3, -1 ; 32LARGE-ASM-NEXT: cmplwi 3, 3 -; 32LARGE-ASM-NEXT: bgt 0, L..BB0_6 +; 32LARGE-ASM-NEXT: bgt 0, L..BB0_3 ; 32LARGE-ASM-NEXT: # %bb.1: # %entry ; 32LARGE-ASM-NEXT: addis 4, L..C0@u(2) ; 32LARGE-ASM-NEXT: slwi 3, 3, 2 @@ -82,32 +82,32 @@ ; 32LARGE-ASM-NEXT: mtctr 3 ; 32LARGE-ASM-NEXT: bctr ; 32LARGE-ASM-NEXT: L..BB0_2: # %sw.bb -; 32LARGE-ASM-NEXT: li 3, 0 ; 32LARGE-ASM-NEXT: #APP ; 32LARGE-ASM-NEXT: #NO_APP +; 32LARGE-ASM-NEXT: L..BB0_3: # %sw.epilog +; 32LARGE-ASM-NEXT: li 3, 0 ; 32LARGE-ASM-NEXT: blr -; 32LARGE-ASM-NEXT: L..BB0_3: # %sw.bb1 +; 32LARGE-ASM-NEXT: L..BB0_4: # %sw.bb1 ; 32LARGE-ASM-NEXT: li 3, 0 ; 32LARGE-ASM-NEXT: #APP ; 32LARGE-ASM-NEXT: #NO_APP ; 32LARGE-ASM-NEXT: blr -; 32LARGE-ASM-NEXT: L..BB0_4: # %sw.bb2 +; 32LARGE-ASM-NEXT: L..BB0_5: # %sw.bb2 ; 32LARGE-ASM-NEXT: li 3, 0 ; 32LARGE-ASM-NEXT: #APP ; 32LARGE-ASM-NEXT: #NO_APP ; 32LARGE-ASM-NEXT: blr -; 32LARGE-ASM-NEXT: L..BB0_5: # %sw.bb3 +; 32LARGE-ASM-NEXT: L..BB0_6: # %sw.bb3 +; 32LARGE-ASM-NEXT: li 3, 0 ; 32LARGE-ASM-NEXT: #APP ; 32LARGE-ASM-NEXT: #NO_APP -; 32LARGE-ASM-NEXT: L..BB0_6: # %sw.epilog -; 32LARGE-ASM-NEXT: li 3, 0 ; 32LARGE-ASM-NEXT: blr ; ; 64SMALL-ASM-LABEL: jump_table: ; 64SMALL-ASM: # %bb.0: # %entry ; 64SMALL-ASM-NEXT: addi 3, 3, -1 ; 64SMALL-ASM-NEXT: cmplwi 3, 3 -; 64SMALL-ASM-NEXT: bgt 0, L..BB0_6 +; 64SMALL-ASM-NEXT: bgt 0, L..BB0_3 ; 64SMALL-ASM-NEXT: # %bb.1: # %entry ; 64SMALL-ASM-NEXT: ld 4, L..C0(2) # %jump-table.0 ; 64SMALL-ASM-NEXT: rldic 3, 3, 2, 30 @@ -116,32 +116,32 @@ ; 64SMALL-ASM-NEXT: mtctr 3 ; 64SMALL-ASM-NEXT: bctr ; 64SMALL-ASM-NEXT: L..BB0_2: # %sw.bb -; 64SMALL-ASM-NEXT: li 3, 0 ; 64SMALL-ASM-NEXT: #APP ; 64SMALL-ASM-NEXT: #NO_APP +; 64SMALL-ASM-NEXT: L..BB0_3: # %sw.epilog +; 64SMALL-ASM-NEXT: li 3, 0 ; 64SMALL-ASM-NEXT: blr -; 64SMALL-ASM-NEXT: L..BB0_3: # %sw.bb1 +; 64SMALL-ASM-NEXT: L..BB0_4: # %sw.bb1 ; 64SMALL-ASM-NEXT: li 3, 0 ; 64SMALL-ASM-NEXT: #APP ; 64SMALL-ASM-NEXT: #NO_APP ; 64SMALL-ASM-NEXT: blr -; 64SMALL-ASM-NEXT: L..BB0_4: # %sw.bb2 +; 64SMALL-ASM-NEXT: L..BB0_5: # %sw.bb2 ; 64SMALL-ASM-NEXT: li 3, 0 ; 64SMALL-ASM-NEXT: #APP ; 64SMALL-ASM-NEXT: #NO_APP ; 64SMALL-ASM-NEXT: blr -; 64SMALL-ASM-NEXT: L..BB0_5: # %sw.bb3 +; 64SMALL-ASM-NEXT: L..BB0_6: # %sw.bb3 +; 64SMALL-ASM-NEXT: li 3, 0 ; 64SMALL-ASM-NEXT: #APP ; 64SMALL-ASM-NEXT: #NO_APP -; 64SMALL-ASM-NEXT: L..BB0_6: # %sw.epilog -; 64SMALL-ASM-NEXT: li 3, 0 ; 64SMALL-ASM-NEXT: blr ; ; 64LARGE-ASM-LABEL: jump_table: ; 64LARGE-ASM: # %bb.0: # %entry ; 64LARGE-ASM-NEXT: addi 3, 3, -1 ; 64LARGE-ASM-NEXT: cmplwi 3, 3 -; 64LARGE-ASM-NEXT: bgt 0, L..BB0_6 +; 64LARGE-ASM-NEXT: bgt 0, L..BB0_3 ; 64LARGE-ASM-NEXT: # %bb.1: # %entry ; 64LARGE-ASM-NEXT: addis 4, L..C0@u(2) ; 64LARGE-ASM-NEXT: rldic 3, 3, 2, 30 @@ -151,25 +151,25 @@ ; 64LARGE-ASM-NEXT: mtctr 3 ; 64LARGE-ASM-NEXT: bctr ; 64LARGE-ASM-NEXT: L..BB0_2: # %sw.bb -; 64LARGE-ASM-NEXT: li 3, 0 ; 64LARGE-ASM-NEXT: #APP ; 64LARGE-ASM-NEXT: #NO_APP +; 64LARGE-ASM-NEXT: L..BB0_3: # %sw.epilog +; 64LARGE-ASM-NEXT: li 3, 0 ; 64LARGE-ASM-NEXT: blr -; 64LARGE-ASM-NEXT: L..BB0_3: # %sw.bb1 +; 64LARGE-ASM-NEXT: L..BB0_4: # %sw.bb1 ; 64LARGE-ASM-NEXT: li 3, 0 ; 64LARGE-ASM-NEXT: #APP ; 64LARGE-ASM-NEXT: #NO_APP ; 64LARGE-ASM-NEXT: blr -; 64LARGE-ASM-NEXT: L..BB0_4: # %sw.bb2 +; 64LARGE-ASM-NEXT: L..BB0_5: # %sw.bb2 ; 64LARGE-ASM-NEXT: li 3, 0 ; 64LARGE-ASM-NEXT: #APP ; 64LARGE-ASM-NEXT: #NO_APP ; 64LARGE-ASM-NEXT: blr -; 64LARGE-ASM-NEXT: L..BB0_5: # %sw.bb3 +; 64LARGE-ASM-NEXT: L..BB0_6: # %sw.bb3 +; 64LARGE-ASM-NEXT: li 3, 0 ; 64LARGE-ASM-NEXT: #APP ; 64LARGE-ASM-NEXT: #NO_APP -; 64LARGE-ASM-NEXT: L..BB0_6: # %sw.epilog -; 64LARGE-ASM-NEXT: li 3, 0 ; 64LARGE-ASM-NEXT: blr entry: switch i32 %a, label %sw.epilog [ diff --git a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll --- a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll +++ b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll @@ -67,12 +67,6 @@ define signext i32 @test(ptr nocapture %FP) local_unnamed_addr #0 { ; CHECKLX-LABEL: test: ; CHECKLX: # %bb.0: # %entry -; CHECKLX-NEXT: mflr 0 -; CHECKLX-NEXT: stdu 1, -32(1) -; CHECKLX-NEXT: std 2, 24(1) -; CHECKLX-NEXT: std 0, 48(1) -; CHECKLX-NEXT: .cfi_def_cfa_offset 32 -; CHECKLX-NEXT: .cfi_offset lr, 16 ; CHECKLX-NEXT: addis 4, 2, .LC0@toc@ha ; CHECKLX-NEXT: addis 5, 2, .LC1@toc@ha ; CHECKLX-NEXT: mr 12, 3 @@ -94,6 +88,12 @@ ; CHECKLX-NEXT: lwz 6, 0(4) ; CHECKLX-NEXT: ble 0, .LBB0_1 ; CHECKLX-NEXT: .LBB0_2: # %if.then +; CHECKLX-NEXT: mflr 0 +; CHECKLX-NEXT: stdu 1, -32(1) +; CHECKLX-NEXT: std 2, 24(1) +; CHECKLX-NEXT: std 0, 48(1) +; CHECKLX-NEXT: .cfi_def_cfa_offset 32 +; CHECKLX-NEXT: .cfi_offset lr, 16 ; CHECKLX-NEXT: extsw 3, 6 ; CHECKLX-NEXT: mtctr 12 ; CHECKLX-NEXT: bctrl @@ -105,9 +105,6 @@ ; ; CHECKAIX-LABEL: test: ; CHECKAIX: # %bb.0: # %entry -; CHECKAIX-NEXT: mflr 0 -; CHECKAIX-NEXT: stdu 1, -112(1) -; CHECKAIX-NEXT: std 0, 128(1) ; CHECKAIX-NEXT: ld 5, L..C0(2) # @ga ; CHECKAIX-NEXT: ld 6, L..C1(2) # @gb ; CHECKAIX-NEXT: L..BB0_1: # %if.end @@ -123,7 +120,10 @@ ; CHECKAIX-NEXT: stw 4, 0(5) ; CHECKAIX-NEXT: b L..BB0_1 ; CHECKAIX-NEXT: L..BB0_3: # %if.then +; CHECKAIX-NEXT: mflr 0 +; CHECKAIX-NEXT: stdu 1, -112(1) ; CHECKAIX-NEXT: ld 5, 0(3) +; CHECKAIX-NEXT: std 0, 128(1) ; CHECKAIX-NEXT: ld 11, 16(3) ; CHECKAIX-NEXT: std 2, 40(1) ; CHECKAIX-NEXT: ld 2, 8(3) @@ -138,9 +138,6 @@ ; ; CHECKAIX32-LABEL: test: ; CHECKAIX32: # %bb.0: # %entry -; CHECKAIX32-NEXT: mflr 0 -; CHECKAIX32-NEXT: stwu 1, -64(1) -; CHECKAIX32-NEXT: stw 0, 72(1) ; CHECKAIX32-NEXT: lwz 5, L..C0(2) # @ga ; CHECKAIX32-NEXT: lwz 6, L..C1(2) # @gb ; CHECKAIX32-NEXT: L..BB0_1: # %if.end @@ -156,10 +153,13 @@ ; CHECKAIX32-NEXT: stw 4, 0(5) ; CHECKAIX32-NEXT: b L..BB0_1 ; CHECKAIX32-NEXT: L..BB0_3: # %if.then +; CHECKAIX32-NEXT: mflr 0 +; CHECKAIX32-NEXT: stwu 1, -64(1) ; CHECKAIX32-NEXT: lwz 5, 0(3) +; CHECKAIX32-NEXT: stw 0, 72(1) ; CHECKAIX32-NEXT: stw 2, 20(1) -; CHECKAIX32-NEXT: lwz 11, 8(3) ; CHECKAIX32-NEXT: mtctr 5 +; CHECKAIX32-NEXT: lwz 11, 8(3) ; CHECKAIX32-NEXT: lwz 2, 4(3) ; CHECKAIX32-NEXT: mr 3, 4 ; CHECKAIX32-NEXT: bctrl diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll @@ -24,12 +24,10 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 { ; CHECK-LABEL: P10_Spill_CR_EQ: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: mfcr r12 -; CHECK-NEXT: stw r12, 8(r1) ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: ld r4, 0(0) -; CHECK-NEXT: ld r5, 56(0) ; CHECK-NEXT: cmpdi r3, 0 +; CHECK-NEXT: ld r5, 56(0) ; CHECK-NEXT: cmpdi cr1, r4, 0 ; CHECK-NEXT: cmpdi cr5, r5, 0 ; CHECK-NEXT: cmpldi cr6, r3, 0 @@ -55,13 +53,15 @@ ; CHECK-NEXT: .LBB0_7: ; CHECK-NEXT: # implicit-def: $r4 ; CHECK-NEXT: .LBB0_8: # %bb20 +; CHECK-NEXT: mfcr r12 ; CHECK-NEXT: cmpwi cr2, r3, -1 ; CHECK-NEXT: cmpwi cr3, r4, -1 +; CHECK-NEXT: stw r12, 8(r1) ; CHECK-NEXT: cmpwi cr7, r3, 0 ; CHECK-NEXT: cmpwi cr6, r4, 0 -; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt ; CHECK-NEXT: crand 4*cr5+lt, 4*cr3+gt, 4*cr5+un +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10 ; CHECK-NEXT: # %bb.9: # %bb34 ; CHECK-NEXT: ld r3, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -7,10 +7,8 @@ define void @print_res() nounwind { ; CHECK-LABEL: print_res: ; CHECK: # %bb.0: -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stdu 1, -128(1) -; CHECK-NEXT: std 0, 144(1) ; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: mflr 0 ; CHECK-NEXT: addi 3, 3, -1 ; CHECK-NEXT: clrldi 4, 3, 32 ; CHECK-NEXT: cmplwi 3, 3 @@ -20,11 +18,13 @@ ; CHECK-NEXT: cmpldi 3, 1 ; CHECK-NEXT: iselgt 3, 3, 4 ; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: stdu 1, -128(1) +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: std 0, 144(1) +; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: lbz 5, 0(5) -; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: bdz .LBB0_6 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: xori 6, 5, 84 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -95,10 +95,6 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_floor_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -109,6 +105,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rdn ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB1_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 @@ -153,11 +153,6 @@ ; ; RV32IZHINX-LABEL: test_floor_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI1_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI1_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -168,6 +163,11 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rdn ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB1_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: lui a0, 913408 ; RV32IZHINX-NEXT: fle.s s1, a0, s0 @@ -189,16 +189,16 @@ ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB1_4: ; RV32IZHINX-NEXT: and a0, a2, a0 -; RV32IZHINX-NEXT: beqz a4, .LBB1_6 -; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 -; RV32IZHINX-NEXT: .LBB1_6: -; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: beqz a4, .LBB1_6 +; RV32IZHINX-NEXT: # %bb.5: +; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: .LBB1_6: +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_floor_si64: @@ -510,10 +510,6 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_floor_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -524,6 +520,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rdn ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB3_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fs0 @@ -555,10 +555,6 @@ ; ; RV32IZHINX-LABEL: test_floor_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI3_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI3_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -569,6 +565,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rdn ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB3_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: fle.s a0, zero, s0 ; RV32IZHINX-NEXT: neg s1, a0 @@ -807,10 +807,6 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_ceil_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -821,6 +817,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rup ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB5_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 @@ -865,11 +865,6 @@ ; ; RV32IZHINX-LABEL: test_ceil_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI5_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI5_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -880,6 +875,11 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rup ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB5_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: lui a0, 913408 ; RV32IZHINX-NEXT: fle.s s1, a0, s0 @@ -901,16 +901,16 @@ ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB5_4: ; RV32IZHINX-NEXT: and a0, a2, a0 -; RV32IZHINX-NEXT: beqz a4, .LBB5_6 -; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 -; RV32IZHINX-NEXT: .LBB5_6: -; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: beqz a4, .LBB5_6 +; RV32IZHINX-NEXT: # %bb.5: +; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: .LBB5_6: +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_ceil_si64: @@ -1222,10 +1222,6 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_ceil_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -1236,6 +1232,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rup ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fs0 @@ -1267,10 +1267,6 @@ ; ; RV32IZHINX-LABEL: test_ceil_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI7_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI7_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -1281,6 +1277,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rup ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB7_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: fle.s a0, zero, s0 ; RV32IZHINX-NEXT: neg s1, a0 @@ -1519,10 +1519,6 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_trunc_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -1533,6 +1529,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rtz ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB9_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 @@ -1577,11 +1577,6 @@ ; ; RV32IZHINX-LABEL: test_trunc_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI9_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI9_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -1592,6 +1587,11 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rtz ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB9_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: lui a0, 913408 ; RV32IZHINX-NEXT: fle.s s1, a0, s0 @@ -1613,16 +1613,16 @@ ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB9_4: ; RV32IZHINX-NEXT: and a0, a2, a0 -; RV32IZHINX-NEXT: beqz a4, .LBB9_6 -; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 -; RV32IZHINX-NEXT: .LBB9_6: -; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: beqz a4, .LBB9_6 +; RV32IZHINX-NEXT: # %bb.5: +; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: .LBB9_6: +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_trunc_si64: @@ -1934,10 +1934,6 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_trunc_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -1948,6 +1944,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rtz ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB11_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fs0 @@ -1979,10 +1979,6 @@ ; ; RV32IZHINX-LABEL: test_trunc_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI11_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI11_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -1993,6 +1989,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rtz ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB11_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: fle.s a0, zero, s0 ; RV32IZHINX-NEXT: neg s1, a0 @@ -2231,10 +2231,6 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_round_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI13_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -2245,6 +2241,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rmm ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB13_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 @@ -2289,11 +2289,6 @@ ; ; RV32IZHINX-LABEL: test_round_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI13_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI13_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -2304,6 +2299,11 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rmm ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB13_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: lui a0, 913408 ; RV32IZHINX-NEXT: fle.s s1, a0, s0 @@ -2325,16 +2325,16 @@ ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB13_4: ; RV32IZHINX-NEXT: and a0, a2, a0 -; RV32IZHINX-NEXT: beqz a4, .LBB13_6 -; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 -; RV32IZHINX-NEXT: .LBB13_6: -; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: beqz a4, .LBB13_6 +; RV32IZHINX-NEXT: # %bb.5: +; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: .LBB13_6: +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_round_si64: @@ -2646,10 +2646,6 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_round_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -2660,6 +2656,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rmm ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB15_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fs0 @@ -2691,10 +2691,6 @@ ; ; RV32IZHINX-LABEL: test_round_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI15_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI15_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -2705,6 +2701,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rmm ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB15_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: fle.s a0, zero, s0 ; RV32IZHINX-NEXT: neg s1, a0 @@ -2943,10 +2943,6 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_roundeven_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -2957,6 +2953,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rne ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB17_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 @@ -3001,11 +3001,6 @@ ; ; RV32IZHINX-LABEL: test_roundeven_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI17_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI17_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -3016,6 +3011,11 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rne ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB17_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: lui a0, 913408 ; RV32IZHINX-NEXT: fle.s s1, a0, s0 @@ -3037,16 +3037,16 @@ ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB17_4: ; RV32IZHINX-NEXT: and a0, a2, a0 -; RV32IZHINX-NEXT: beqz a4, .LBB17_6 -; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 -; RV32IZHINX-NEXT: .LBB17_6: -; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: beqz a4, .LBB17_6 +; RV32IZHINX-NEXT: # %bb.5: +; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: .LBB17_6: +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_roundeven_si64: @@ -3358,10 +3358,6 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_roundeven_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -3372,6 +3368,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rne ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB19_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fs0 @@ -3403,10 +3403,6 @@ ; ; RV32IZHINX-LABEL: test_roundeven_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI19_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI19_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -3417,6 +3413,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rne ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB19_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 ; RV32IZHINX-NEXT: fle.s a0, zero, s0 ; RV32IZHINX-NEXT: neg s1, a0 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv.ll b/llvm/test/CodeGen/RISCV/half-round-conv.ll --- a/llvm/test/CodeGen/RISCV/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv.ll @@ -309,10 +309,6 @@ define i64 @test_floor_si64(half %x) { ; RV32IZFH-LABEL: test_floor_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -323,6 +319,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rdn ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB3_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -335,10 +335,6 @@ ; ; RV32IZHINX-LABEL: test_floor_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI3_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI3_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -349,6 +345,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rdn ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB3_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixhfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -746,10 +746,6 @@ define i64 @test_floor_ui64(half %x) { ; RV32IZFH-LABEL: test_floor_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -760,6 +756,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rdn ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -772,10 +772,6 @@ ; ; RV32IZHINX-LABEL: test_floor_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI7_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI7_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -786,6 +782,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rdn ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB7_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixunshfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -1183,10 +1183,6 @@ define i64 @test_ceil_si64(half %x) { ; RV32IZFH-LABEL: test_ceil_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -1197,6 +1193,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rup ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB11_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -1209,10 +1209,6 @@ ; ; RV32IZHINX-LABEL: test_ceil_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI11_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI11_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -1223,6 +1219,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rup ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB11_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixhfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -1620,10 +1620,6 @@ define i64 @test_ceil_ui64(half %x) { ; RV32IZFH-LABEL: test_ceil_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -1634,6 +1630,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rup ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB15_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -1646,10 +1646,6 @@ ; ; RV32IZHINX-LABEL: test_ceil_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI15_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI15_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -1660,6 +1656,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rup ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB15_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixunshfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -2057,10 +2057,6 @@ define i64 @test_trunc_si64(half %x) { ; RV32IZFH-LABEL: test_trunc_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -2071,6 +2067,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rtz ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB19_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -2083,10 +2083,6 @@ ; ; RV32IZHINX-LABEL: test_trunc_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI19_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI19_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -2097,6 +2093,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rtz ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB19_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixhfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -2494,10 +2494,6 @@ define i64 @test_trunc_ui64(half %x) { ; RV32IZFH-LABEL: test_trunc_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI23_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -2508,6 +2504,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rtz ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB23_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -2520,10 +2520,6 @@ ; ; RV32IZHINX-LABEL: test_trunc_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI23_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI23_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -2534,6 +2530,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rtz ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB23_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixunshfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -2931,10 +2931,6 @@ define i64 @test_round_si64(half %x) { ; RV32IZFH-LABEL: test_round_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI27_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI27_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -2945,6 +2941,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rmm ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB27_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -2957,10 +2957,6 @@ ; ; RV32IZHINX-LABEL: test_round_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI27_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI27_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -2971,6 +2967,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rmm ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB27_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixhfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -3368,10 +3368,6 @@ define i64 @test_round_ui64(half %x) { ; RV32IZFH-LABEL: test_round_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI31_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -3382,6 +3378,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rmm ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB31_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -3394,10 +3394,6 @@ ; ; RV32IZHINX-LABEL: test_round_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI31_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI31_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -3408,6 +3404,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rmm ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB31_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixunshfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -3805,10 +3805,6 @@ define i64 @test_roundeven_si64(half %x) { ; RV32IZFH-LABEL: test_roundeven_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI35_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI35_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -3819,6 +3815,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rne ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB35_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -3831,10 +3831,6 @@ ; ; RV32IZHINX-LABEL: test_roundeven_si64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI35_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI35_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -3845,6 +3841,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rne ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB35_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixhfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 @@ -4242,10 +4242,6 @@ define i64 @test_roundeven_ui64(half %x) { ; RV32IZFH-LABEL: test_roundeven_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI39_0) ; RV32IZFH-NEXT: flh fa5, %lo(.LCPI39_0)(a0) ; RV32IZFH-NEXT: fabs.h fa4, fa0 @@ -4256,6 +4252,10 @@ ; RV32IZFH-NEXT: fcvt.h.w fa5, a0, rne ; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 ; RV32IZFH-NEXT: .LBB39_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: .cfi_offset ra, -4 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -4268,10 +4268,6 @@ ; ; RV32IZHINX-LABEL: test_roundeven_ui64: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: lui a1, %hi(.LCPI39_0) ; RV32IZHINX-NEXT: lh a1, %lo(.LCPI39_0)(a1) ; RV32IZHINX-NEXT: fabs.h a2, a0 @@ -4282,6 +4278,10 @@ ; RV32IZHINX-NEXT: fcvt.h.w a1, a1, rne ; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 ; RV32IZHINX-NEXT: .LBB39_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: .cfi_offset ra, -4 ; RV32IZHINX-NEXT: call __fixunshfdi@plt ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -3984,63 +3984,47 @@ ; ; RV32ZVE32F-LABEL: mgather_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a2, t0, 1 -; RV32ZVE32F-NEXT: beqz a2, .LBB47_9 +; RV32ZVE32F-NEXT: beqz a2, .LBB47_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a2, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB47_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB47_8 ; RV32ZVE32F-NEXT: .LBB47_2: ; RV32ZVE32F-NEXT: lw a4, 12(a1) ; RV32ZVE32F-NEXT: lw a5, 8(a1) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB47_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB47_9 ; RV32ZVE32F-NEXT: .LBB47_3: ; RV32ZVE32F-NEXT: lw a6, 20(a1) ; RV32ZVE32F-NEXT: lw a7, 16(a1) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB47_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB47_10 ; RV32ZVE32F-NEXT: .LBB47_4: ; RV32ZVE32F-NEXT: lw t1, 28(a1) ; RV32ZVE32F-NEXT: lw t2, 24(a1) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB47_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB47_11 ; RV32ZVE32F-NEXT: .LBB47_5: ; RV32ZVE32F-NEXT: lw t3, 36(a1) ; RV32ZVE32F-NEXT: lw t4, 32(a1) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB47_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB47_12 ; RV32ZVE32F-NEXT: .LBB47_6: ; RV32ZVE32F-NEXT: lw t5, 44(a1) ; RV32ZVE32F-NEXT: lw t6, 40(a1) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB47_15 +; RV32ZVE32F-NEXT: j .LBB47_13 ; RV32ZVE32F-NEXT: .LBB47_7: -; RV32ZVE32F-NEXT: lw s0, 52(a1) -; RV32ZVE32F-NEXT: lw s1, 48(a1) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB47_16 -; RV32ZVE32F-NEXT: .LBB47_8: -; RV32ZVE32F-NEXT: lw t0, 60(a1) -; RV32ZVE32F-NEXT: lw a1, 56(a1) -; RV32ZVE32F-NEXT: j .LBB47_17 -; RV32ZVE32F-NEXT: .LBB47_9: ; RV32ZVE32F-NEXT: lw a2, 4(a1) ; RV32ZVE32F-NEXT: lw a3, 0(a1) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB47_2 -; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -4048,7 +4032,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB47_3 -; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -4056,7 +4040,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB47_4 -; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -4064,7 +4048,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB47_5 -; RV32ZVE32F-NEXT: .LBB47_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -4072,29 +4056,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB47_6 -; RV32ZVE32F-NEXT: .LBB47_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB47_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB47_7 -; RV32ZVE32F-NEXT: .LBB47_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB47_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB47_8 -; RV32ZVE32F-NEXT: .LBB47_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB47_17 +; RV32ZVE32F-NEXT: .LBB47_15: +; RV32ZVE32F-NEXT: lw t0, 60(a1) +; RV32ZVE32F-NEXT: lw a1, 56(a1) +; RV32ZVE32F-NEXT: j .LBB47_18 +; RV32ZVE32F-NEXT: .LBB47_16: +; RV32ZVE32F-NEXT: lw s0, 52(a1) +; RV32ZVE32F-NEXT: lw s1, 48(a1) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB47_15 +; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a1) ; RV32ZVE32F-NEXT: lw a1, 0(a1) -; RV32ZVE32F-NEXT: .LBB47_17: # %else20 +; RV32ZVE32F-NEXT: .LBB47_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a2, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -4227,12 +4227,6 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -4240,54 +4234,44 @@ ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB48_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB48_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB48_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB48_8 ; RV32ZVE32F-NEXT: .LBB48_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB48_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB48_9 ; RV32ZVE32F-NEXT: .LBB48_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB48_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB48_10 ; RV32ZVE32F-NEXT: .LBB48_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB48_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB48_11 ; RV32ZVE32F-NEXT: .LBB48_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB48_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB48_12 ; RV32ZVE32F-NEXT: .LBB48_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB48_15 +; RV32ZVE32F-NEXT: j .LBB48_13 ; RV32ZVE32F-NEXT: .LBB48_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB48_16 -; RV32ZVE32F-NEXT: .LBB48_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB48_17 -; RV32ZVE32F-NEXT: .LBB48_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB48_2 -; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -4295,7 +4279,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB48_3 -; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -4303,7 +4287,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB48_4 -; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -4311,7 +4295,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB48_5 -; RV32ZVE32F-NEXT: .LBB48_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -4319,29 +4303,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB48_6 -; RV32ZVE32F-NEXT: .LBB48_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB48_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB48_7 -; RV32ZVE32F-NEXT: .LBB48_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB48_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB48_8 -; RV32ZVE32F-NEXT: .LBB48_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB48_17 +; RV32ZVE32F-NEXT: .LBB48_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB48_18 +; RV32ZVE32F-NEXT: .LBB48_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB48_15 +; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB48_17: # %else20 +; RV32ZVE32F-NEXT: .LBB48_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -4505,12 +4505,6 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -4518,54 +4512,44 @@ ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB49_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB49_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB49_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB49_8 ; RV32ZVE32F-NEXT: .LBB49_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB49_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB49_9 ; RV32ZVE32F-NEXT: .LBB49_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB49_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB49_10 ; RV32ZVE32F-NEXT: .LBB49_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB49_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB49_11 ; RV32ZVE32F-NEXT: .LBB49_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB49_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB49_12 ; RV32ZVE32F-NEXT: .LBB49_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB49_15 +; RV32ZVE32F-NEXT: j .LBB49_13 ; RV32ZVE32F-NEXT: .LBB49_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB49_16 -; RV32ZVE32F-NEXT: .LBB49_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB49_17 -; RV32ZVE32F-NEXT: .LBB49_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB49_2 -; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -4573,7 +4557,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB49_3 -; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -4581,7 +4565,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB49_4 -; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -4589,7 +4573,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB49_5 -; RV32ZVE32F-NEXT: .LBB49_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -4597,29 +4581,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB49_6 -; RV32ZVE32F-NEXT: .LBB49_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB49_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB49_7 -; RV32ZVE32F-NEXT: .LBB49_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB49_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB49_8 -; RV32ZVE32F-NEXT: .LBB49_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB49_17 +; RV32ZVE32F-NEXT: .LBB49_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB49_18 +; RV32ZVE32F-NEXT: .LBB49_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB49_15 +; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB49_17: # %else20 +; RV32ZVE32F-NEXT: .LBB49_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -4784,12 +4784,6 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -4797,54 +4791,44 @@ ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB50_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB50_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB50_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB50_8 ; RV32ZVE32F-NEXT: .LBB50_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB50_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB50_9 ; RV32ZVE32F-NEXT: .LBB50_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB50_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB50_10 ; RV32ZVE32F-NEXT: .LBB50_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB50_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB50_11 ; RV32ZVE32F-NEXT: .LBB50_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB50_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB50_12 ; RV32ZVE32F-NEXT: .LBB50_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB50_15 +; RV32ZVE32F-NEXT: j .LBB50_13 ; RV32ZVE32F-NEXT: .LBB50_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB50_16 -; RV32ZVE32F-NEXT: .LBB50_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB50_17 -; RV32ZVE32F-NEXT: .LBB50_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB50_2 -; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -4852,7 +4836,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB50_3 -; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -4860,7 +4844,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB50_4 -; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -4868,7 +4852,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB50_5 -; RV32ZVE32F-NEXT: .LBB50_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -4876,29 +4860,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB50_6 -; RV32ZVE32F-NEXT: .LBB50_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB50_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB50_7 -; RV32ZVE32F-NEXT: .LBB50_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB50_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB50_8 -; RV32ZVE32F-NEXT: .LBB50_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB50_17 +; RV32ZVE32F-NEXT: .LBB50_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB50_18 +; RV32ZVE32F-NEXT: .LBB50_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB50_15 +; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB50_17: # %else20 +; RV32ZVE32F-NEXT: .LBB50_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -5071,12 +5071,6 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -5084,54 +5078,44 @@ ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB51_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB51_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB51_8 ; RV32ZVE32F-NEXT: .LBB51_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB51_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB51_9 ; RV32ZVE32F-NEXT: .LBB51_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB51_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB51_10 ; RV32ZVE32F-NEXT: .LBB51_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB51_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB51_11 ; RV32ZVE32F-NEXT: .LBB51_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB51_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB51_12 ; RV32ZVE32F-NEXT: .LBB51_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB51_15 +; RV32ZVE32F-NEXT: j .LBB51_13 ; RV32ZVE32F-NEXT: .LBB51_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB51_16 -; RV32ZVE32F-NEXT: .LBB51_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB51_17 -; RV32ZVE32F-NEXT: .LBB51_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB51_2 -; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -5139,7 +5123,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB51_3 -; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -5147,7 +5131,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB51_4 -; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -5155,7 +5139,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB51_5 -; RV32ZVE32F-NEXT: .LBB51_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -5163,29 +5147,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB51_6 -; RV32ZVE32F-NEXT: .LBB51_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB51_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB51_7 -; RV32ZVE32F-NEXT: .LBB51_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB51_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB51_8 -; RV32ZVE32F-NEXT: .LBB51_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB51_17 +; RV32ZVE32F-NEXT: .LBB51_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB51_18 +; RV32ZVE32F-NEXT: .LBB51_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB51_15 +; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB51_17: # %else20 +; RV32ZVE32F-NEXT: .LBB51_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -5350,12 +5350,6 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -5363,54 +5357,44 @@ ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB52_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB52_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB52_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB52_8 ; RV32ZVE32F-NEXT: .LBB52_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB52_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB52_9 ; RV32ZVE32F-NEXT: .LBB52_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB52_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB52_10 ; RV32ZVE32F-NEXT: .LBB52_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB52_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB52_11 ; RV32ZVE32F-NEXT: .LBB52_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB52_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB52_12 ; RV32ZVE32F-NEXT: .LBB52_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB52_15 +; RV32ZVE32F-NEXT: j .LBB52_13 ; RV32ZVE32F-NEXT: .LBB52_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB52_16 -; RV32ZVE32F-NEXT: .LBB52_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB52_17 -; RV32ZVE32F-NEXT: .LBB52_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB52_2 -; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -5418,7 +5402,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB52_3 -; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -5426,7 +5410,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB52_4 -; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -5434,7 +5418,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB52_5 -; RV32ZVE32F-NEXT: .LBB52_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -5442,29 +5426,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB52_6 -; RV32ZVE32F-NEXT: .LBB52_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB52_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB52_7 -; RV32ZVE32F-NEXT: .LBB52_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB52_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB52_8 -; RV32ZVE32F-NEXT: .LBB52_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB52_17 +; RV32ZVE32F-NEXT: .LBB52_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB52_18 +; RV32ZVE32F-NEXT: .LBB52_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB52_15 +; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB52_17: # %else20 +; RV32ZVE32F-NEXT: .LBB52_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -5630,12 +5630,6 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -5643,54 +5637,44 @@ ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB53_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB53_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB53_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB53_8 ; RV32ZVE32F-NEXT: .LBB53_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB53_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB53_9 ; RV32ZVE32F-NEXT: .LBB53_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB53_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB53_10 ; RV32ZVE32F-NEXT: .LBB53_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB53_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB53_11 ; RV32ZVE32F-NEXT: .LBB53_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB53_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB53_12 ; RV32ZVE32F-NEXT: .LBB53_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB53_15 +; RV32ZVE32F-NEXT: j .LBB53_13 ; RV32ZVE32F-NEXT: .LBB53_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB53_16 -; RV32ZVE32F-NEXT: .LBB53_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB53_17 -; RV32ZVE32F-NEXT: .LBB53_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB53_2 -; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -5698,7 +5682,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB53_3 -; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -5706,7 +5690,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB53_4 -; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -5714,7 +5698,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB53_5 -; RV32ZVE32F-NEXT: .LBB53_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -5722,29 +5706,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB53_6 -; RV32ZVE32F-NEXT: .LBB53_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB53_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB53_7 -; RV32ZVE32F-NEXT: .LBB53_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB53_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB53_8 -; RV32ZVE32F-NEXT: .LBB53_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB53_17 +; RV32ZVE32F-NEXT: .LBB53_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB53_18 +; RV32ZVE32F-NEXT: .LBB53_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB53_15 +; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB53_17: # %else20 +; RV32ZVE32F-NEXT: .LBB53_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -5919,66 +5919,50 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB54_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB54_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB54_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB54_8 ; RV32ZVE32F-NEXT: .LBB54_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB54_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB54_9 ; RV32ZVE32F-NEXT: .LBB54_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB54_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB54_10 ; RV32ZVE32F-NEXT: .LBB54_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB54_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB54_11 ; RV32ZVE32F-NEXT: .LBB54_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB54_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB54_12 ; RV32ZVE32F-NEXT: .LBB54_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB54_15 +; RV32ZVE32F-NEXT: j .LBB54_13 ; RV32ZVE32F-NEXT: .LBB54_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB54_16 -; RV32ZVE32F-NEXT: .LBB54_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB54_17 -; RV32ZVE32F-NEXT: .LBB54_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB54_2 -; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -5986,7 +5970,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB54_3 -; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -5994,7 +5978,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB54_4 -; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -6002,7 +5986,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB54_5 -; RV32ZVE32F-NEXT: .LBB54_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -6010,29 +5994,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB54_6 -; RV32ZVE32F-NEXT: .LBB54_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB54_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB54_7 -; RV32ZVE32F-NEXT: .LBB54_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB54_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB54_8 -; RV32ZVE32F-NEXT: .LBB54_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB54_17 +; RV32ZVE32F-NEXT: .LBB54_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB54_18 +; RV32ZVE32F-NEXT: .LBB54_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB54_15 +; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB54_17: # %else20 +; RV32ZVE32F-NEXT: .LBB54_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -6192,66 +6192,50 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB55_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB55_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB55_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB55_8 ; RV32ZVE32F-NEXT: .LBB55_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB55_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB55_9 ; RV32ZVE32F-NEXT: .LBB55_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB55_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB55_10 ; RV32ZVE32F-NEXT: .LBB55_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB55_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB55_11 ; RV32ZVE32F-NEXT: .LBB55_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB55_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB55_12 ; RV32ZVE32F-NEXT: .LBB55_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB55_15 +; RV32ZVE32F-NEXT: j .LBB55_13 ; RV32ZVE32F-NEXT: .LBB55_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB55_16 -; RV32ZVE32F-NEXT: .LBB55_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB55_17 -; RV32ZVE32F-NEXT: .LBB55_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB55_2 -; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -6259,7 +6243,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB55_3 -; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -6267,7 +6251,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB55_4 -; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -6275,7 +6259,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB55_5 -; RV32ZVE32F-NEXT: .LBB55_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -6283,29 +6267,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB55_6 -; RV32ZVE32F-NEXT: .LBB55_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB55_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB55_7 -; RV32ZVE32F-NEXT: .LBB55_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB55_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB55_8 -; RV32ZVE32F-NEXT: .LBB55_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB55_17 +; RV32ZVE32F-NEXT: .LBB55_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB55_18 +; RV32ZVE32F-NEXT: .LBB55_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB55_15 +; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB55_17: # %else20 +; RV32ZVE32F-NEXT: .LBB55_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -6466,66 +6466,50 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB56_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB56_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB56_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB56_8 ; RV32ZVE32F-NEXT: .LBB56_2: ; RV32ZVE32F-NEXT: lw a4, 12(a2) ; RV32ZVE32F-NEXT: lw a5, 8(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB56_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB56_9 ; RV32ZVE32F-NEXT: .LBB56_3: ; RV32ZVE32F-NEXT: lw a6, 20(a2) ; RV32ZVE32F-NEXT: lw a7, 16(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB56_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB56_10 ; RV32ZVE32F-NEXT: .LBB56_4: ; RV32ZVE32F-NEXT: lw t1, 28(a2) ; RV32ZVE32F-NEXT: lw t2, 24(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB56_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB56_11 ; RV32ZVE32F-NEXT: .LBB56_5: ; RV32ZVE32F-NEXT: lw t3, 36(a2) ; RV32ZVE32F-NEXT: lw t4, 32(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB56_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB56_12 ; RV32ZVE32F-NEXT: .LBB56_6: ; RV32ZVE32F-NEXT: lw t5, 44(a2) ; RV32ZVE32F-NEXT: lw t6, 40(a2) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB56_15 +; RV32ZVE32F-NEXT: j .LBB56_13 ; RV32ZVE32F-NEXT: .LBB56_7: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB56_16 -; RV32ZVE32F-NEXT: .LBB56_8: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) -; RV32ZVE32F-NEXT: j .LBB56_17 -; RV32ZVE32F-NEXT: .LBB56_9: ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB56_2 -; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -6533,7 +6517,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB56_3 -; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -6541,7 +6525,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB56_4 -; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -6549,7 +6533,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB56_5 -; RV32ZVE32F-NEXT: .LBB56_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -6557,29 +6541,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB56_6 -; RV32ZVE32F-NEXT: .LBB56_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB56_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB56_7 -; RV32ZVE32F-NEXT: .LBB56_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB56_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB56_8 -; RV32ZVE32F-NEXT: .LBB56_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB56_17 +; RV32ZVE32F-NEXT: .LBB56_15: +; RV32ZVE32F-NEXT: lw t0, 60(a2) +; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: j .LBB56_18 +; RV32ZVE32F-NEXT: .LBB56_16: +; RV32ZVE32F-NEXT: lw s0, 52(a2) +; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB56_15 +; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) -; RV32ZVE32F-NEXT: .LBB56_17: # %else20 +; RV32ZVE32F-NEXT: .LBB56_18: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) @@ -6748,12 +6748,6 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset s0, -4 -; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: lw a4, 56(a2) ; RV32ZVE32F-NEXT: lw a5, 48(a2) ; RV32ZVE32F-NEXT: lw a6, 40(a2) @@ -6776,54 +6770,44 @@ ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB57_9 +; RV32ZVE32F-NEXT: beqz a1, .LBB57_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB57_10 +; RV32ZVE32F-NEXT: bnez a4, .LBB57_8 ; RV32ZVE32F-NEXT: .LBB57_2: ; RV32ZVE32F-NEXT: lw a4, 12(a3) ; RV32ZVE32F-NEXT: lw a5, 8(a3) ; RV32ZVE32F-NEXT: andi a6, t0, 4 -; RV32ZVE32F-NEXT: bnez a6, .LBB57_11 +; RV32ZVE32F-NEXT: bnez a6, .LBB57_9 ; RV32ZVE32F-NEXT: .LBB57_3: ; RV32ZVE32F-NEXT: lw a6, 20(a3) ; RV32ZVE32F-NEXT: lw a7, 16(a3) ; RV32ZVE32F-NEXT: andi t1, t0, 8 -; RV32ZVE32F-NEXT: bnez t1, .LBB57_12 +; RV32ZVE32F-NEXT: bnez t1, .LBB57_10 ; RV32ZVE32F-NEXT: .LBB57_4: ; RV32ZVE32F-NEXT: lw t1, 28(a3) ; RV32ZVE32F-NEXT: lw t2, 24(a3) ; RV32ZVE32F-NEXT: andi t3, t0, 16 -; RV32ZVE32F-NEXT: bnez t3, .LBB57_13 +; RV32ZVE32F-NEXT: bnez t3, .LBB57_11 ; RV32ZVE32F-NEXT: .LBB57_5: ; RV32ZVE32F-NEXT: lw t3, 36(a3) ; RV32ZVE32F-NEXT: lw t4, 32(a3) ; RV32ZVE32F-NEXT: andi t5, t0, 32 -; RV32ZVE32F-NEXT: bnez t5, .LBB57_14 +; RV32ZVE32F-NEXT: bnez t5, .LBB57_12 ; RV32ZVE32F-NEXT: .LBB57_6: ; RV32ZVE32F-NEXT: lw t5, 44(a3) ; RV32ZVE32F-NEXT: lw t6, 40(a3) -; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: bnez s0, .LBB57_15 +; RV32ZVE32F-NEXT: j .LBB57_13 ; RV32ZVE32F-NEXT: .LBB57_7: -; RV32ZVE32F-NEXT: lw s0, 52(a3) -; RV32ZVE32F-NEXT: lw s1, 48(a3) -; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: bnez t0, .LBB57_16 -; RV32ZVE32F-NEXT: .LBB57_8: -; RV32ZVE32F-NEXT: lw t0, 60(a3) -; RV32ZVE32F-NEXT: lw a3, 56(a3) -; RV32ZVE32F-NEXT: j .LBB57_17 -; RV32ZVE32F-NEXT: .LBB57_9: ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a2, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB57_2 -; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load1 +; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 @@ -6831,7 +6815,7 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB57_3 -; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load4 +; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 @@ -6839,7 +6823,7 @@ ; RV32ZVE32F-NEXT: lw a7, 0(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB57_4 -; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load7 +; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 @@ -6847,7 +6831,7 @@ ; RV32ZVE32F-NEXT: lw t2, 0(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB57_5 -; RV32ZVE32F-NEXT: .LBB57_13: # %cond.load10 +; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 @@ -6855,29 +6839,45 @@ ; RV32ZVE32F-NEXT: lw t4, 0(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB57_6 -; RV32ZVE32F-NEXT: .LBB57_14: # %cond.load13 +; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 ; RV32ZVE32F-NEXT: lw t5, 4(t6) ; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: .LBB57_13: # %else14 +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: andi s0, t0, 64 -; RV32ZVE32F-NEXT: beqz s0, .LBB57_7 -; RV32ZVE32F-NEXT: .LBB57_15: # %cond.load16 +; RV32ZVE32F-NEXT: beqz s0, .LBB57_16 +; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 ; RV32ZVE32F-NEXT: lw s0, 4(s1) ; RV32ZVE32F-NEXT: lw s1, 0(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 -; RV32ZVE32F-NEXT: beqz t0, .LBB57_8 -; RV32ZVE32F-NEXT: .LBB57_16: # %cond.load19 +; RV32ZVE32F-NEXT: bnez t0, .LBB57_17 +; RV32ZVE32F-NEXT: .LBB57_15: +; RV32ZVE32F-NEXT: lw t0, 60(a3) +; RV32ZVE32F-NEXT: lw a3, 56(a3) +; RV32ZVE32F-NEXT: j .LBB57_18 +; RV32ZVE32F-NEXT: .LBB57_16: +; RV32ZVE32F-NEXT: lw s0, 52(a3) +; RV32ZVE32F-NEXT: lw s1, 48(a3) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB57_15 +; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw t0, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) -; RV32ZVE32F-NEXT: .LBB57_17: # %else20 +; RV32ZVE32F-NEXT: .LBB57_18: # %else20 ; RV32ZVE32F-NEXT: sw a2, 0(a0) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a5, 8(a0) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -199,9 +199,12 @@ define arm_aapcs_vfpcc float @fast_float_mac(ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { ; CHECK-LABEL: fast_float_mac: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: vldreq s0, .LCPI1_0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: cbz r2, .LBB1_4 -; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: mov.w r12, #1 ; CHECK-NEXT: bic r3, r3, #3 @@ -227,11 +230,8 @@ ; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vadd.f32 q0, q0, r0 ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: vldr s0, .LCPI1_0 -; CHECK-NEXT: pop {r7, pc} ; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: @ %bb.4: ; CHECK-NEXT: .LCPI1_0: ; CHECK-NEXT: .long 0x00000000 @ float 0 entry: @@ -274,12 +274,14 @@ define arm_aapcs_vfpcc float @fast_float_half_mac(ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { ; CHECK-LABEL: fast_float_half_mac: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: vldreq s0, .LCPI2_0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: beq.w .LBB2_20 -; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q5, #0x0 ; CHECK-NEXT: bic r3, r3, #3 @@ -430,15 +432,11 @@ ; CHECK-NEXT: vadd.f32 q0, q0, q1 ; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vadd.f32 q0, q0, r0 -; CHECK-NEXT: b .LBB2_21 -; CHECK-NEXT: .LBB2_20: -; CHECK-NEXT: vldr s0, .LCPI2_0 -; CHECK-NEXT: .LBB2_21: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.22: +; CHECK-NEXT: @ %bb.20: ; CHECK-NEXT: .LCPI2_1: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 diff --git a/llvm/test/CodeGen/X86/MachineSink-eflags.ll b/llvm/test/CodeGen/X86/MachineSink-eflags.ll --- a/llvm/test/CodeGen/X86/MachineSink-eflags.ll +++ b/llvm/test/CodeGen/X86/MachineSink-eflags.ll @@ -14,7 +14,6 @@ define void @foo(ptr nocapture %_stubArgs) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $152, %rsp ; CHECK-NEXT: movq 48(%rdi), %rax ; CHECK-NEXT: movl 64(%rdi), %ecx ; CHECK-NEXT: movl $200, %esi @@ -33,6 +32,7 @@ ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: movaps (%rax,%rdx), %xmm0 ; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rsp ; CHECK-NEXT: movaps (%rax,%rcx), %xmm1 ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: jne .LBB0_5 diff --git a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll --- a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll @@ -4,8 +4,6 @@ define i32 @test1(i32 %x) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: #APP ; CHECK-NEXT: .quad .Ltmp0 ; CHECK-NEXT: .quad .LBB0_1 @@ -13,12 +11,14 @@ ; CHECK-NEXT: .LBB0_1: # Block address taken ; CHECK-NEXT: # %bar ; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: .Ltmp0: # Block address taken ; CHECK-NEXT: # %bb.2: # %baz ; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: jmp mux@PLT # TAILCALL entry: callbr void asm sideeffect ".quad ${0:l}\0A\09.quad ${1:l}", "i,!i,~{dirflag},~{fpsr},~{flags}"(ptr blockaddress(@test1, %baz)) diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll --- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -96,6 +96,17 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-LABEL: _Z2x6v: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl (%rax), %esi +; CHECK-NEXT: andl $511, %esi # imm = 0x1FF +; CHECK-NEXT: leaq 1(%rsi), %rax +; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl %eax, (%rcx) +; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl (%rcx), %edx +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: je .LBB1_18 +; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pushq %r15 @@ -114,17 +125,6 @@ ; CHECK-NEXT: .cfi_offset %r14, -32 ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax -; CHECK-NEXT: movl (%rax), %esi -; CHECK-NEXT: andl $511, %esi # imm = 0x1FF -; CHECK-NEXT: leaq 1(%rsi), %rax -; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx -; CHECK-NEXT: movl %eax, (%rcx) -; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx -; CHECK-NEXT: movl (%rcx), %edx -; CHECK-NEXT: testl %edx, %edx -; CHECK-NEXT: je .LBB1_18 -; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph ; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rcx ; CHECK-NEXT: movq (%rcx), %rdi ; CHECK-NEXT: movl %edx, %ecx @@ -255,7 +255,6 @@ ; CHECK-NEXT: movq %rcx, (%rax) ; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax ; CHECK-NEXT: movl $0, (%rax) -; CHECK-NEXT: .LBB1_18: # %for.end5 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: popq %r12 @@ -268,6 +267,13 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_restore %rbx +; CHECK-NEXT: .cfi_restore %r12 +; CHECK-NEXT: .cfi_restore %r13 +; CHECK-NEXT: .cfi_restore %r14 +; CHECK-NEXT: .cfi_restore %r15 +; CHECK-NEXT: .cfi_restore %rbp +; CHECK-NEXT: .LBB1_18: # %for.end5 ; CHECK-NEXT: retq entry: %0 = load i32, ptr @x1, align 4 diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll --- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -17,18 +17,16 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind { ; X32-LABEL: program_1: ; X32: ## %bb.0: ## %entry -; X32-NEXT: pushl %esi -; X32-NEXT: subl $88, %esp ; X32-NEXT: cmpl $0, 0 ; X32-NEXT: jle LBB0_2 ; X32-NEXT: ## %bb.1: ## %forcond ; X32-NEXT: cmpl $0, 0 ; X32-NEXT: jg LBB0_3 ; X32-NEXT: LBB0_2: ## %ifthen -; X32-NEXT: addl $88, %esp -; X32-NEXT: popl %esi ; X32-NEXT: retl ; X32-NEXT: LBB0_3: ## %forbody +; X32-NEXT: pushl %esi +; X32-NEXT: subl $88, %esp ; X32-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2] ; X32-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X32-NEXT: cvttps2dq %xmm1, %xmm0 @@ -101,18 +99,16 @@ ; ; X64-LABEL: program_1: ; X64: ## %bb.0: ## %entry -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $64, %rsp ; X64-NEXT: cmpl $0, 0 ; X64-NEXT: jle LBB0_2 ; X64-NEXT: ## %bb.1: ## %forcond ; X64-NEXT: cmpl $0, 0 ; X64-NEXT: jg LBB0_3 ; X64-NEXT: LBB0_2: ## %ifthen -; X64-NEXT: addq $64, %rsp -; X64-NEXT: popq %rbx ; X64-NEXT: retq ; X64-NEXT: LBB0_3: ## %forbody +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $64, %rsp ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2] diff --git a/llvm/test/CodeGen/X86/fp128-select.ll b/llvm/test/CodeGen/X86/fp128-select.ll --- a/llvm/test/CodeGen/X86/fp128-select.ll +++ b/llvm/test/CodeGen/X86/fp128-select.ll @@ -13,7 +13,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: testl %edx, %edx ; SSE-NEXT: jne .LBB0_1 -; SSE-NEXT: # %bb.2: +; SSE-NEXT: # %bb.3: ; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: movaps %xmm0, (%rsi) ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll b/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll --- a/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll @@ -20,8 +20,6 @@ define i32 @eflagsLiveInPrologue() #0 { ; ENABLE-LABEL: eflagsLiveInPrologue: ; ENABLE: ## %bb.0: ## %entry -; ENABLE-NEXT: pushl %esi -; ENABLE-NEXT: subl $8, %esp ; ENABLE-NEXT: movl L_a$non_lazy_ptr, %eax ; ENABLE-NEXT: cmpl $0, (%eax) ; ENABLE-NEXT: je LBB0_2 @@ -37,6 +35,8 @@ ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: jmp LBB0_3 ; ENABLE-NEXT: LBB0_4: ## %for.end +; ENABLE-NEXT: pushl %esi +; ENABLE-NEXT: subl $8, %esp ; ENABLE-NEXT: xorl %edx, %edx ; ENABLE-NEXT: cmpb $0, _d ; ENABLE-NEXT: movl $6, %ecx diff --git a/llvm/test/CodeGen/X86/inline-asm-flag-output.ll b/llvm/test/CodeGen/X86/inline-asm-flag-output.ll --- a/llvm/test/CodeGen/X86/inline-asm-flag-output.ll +++ b/llvm/test/CodeGen/X86/inline-asm-flag-output.ll @@ -888,15 +888,15 @@ ; ; X64-LABEL: test_cca_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jbe .LBB28_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB28_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -928,15 +928,15 @@ ; ; X64-LABEL: test_ccae_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jb .LBB29_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB29_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -968,15 +968,15 @@ ; ; X64-LABEL: test_ccb_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jae .LBB30_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB30_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1008,15 +1008,15 @@ ; ; X64-LABEL: test_ccbe_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: ja .LBB31_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB31_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1048,15 +1048,15 @@ ; ; X64-LABEL: test_ccc_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jae .LBB32_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB32_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1088,15 +1088,15 @@ ; ; X64-LABEL: test_cce_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jne .LBB33_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB33_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1128,15 +1128,15 @@ ; ; X64-LABEL: test_ccz_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jne .LBB34_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB34_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1168,15 +1168,15 @@ ; ; X64-LABEL: test_ccg_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jle .LBB35_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB35_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1208,15 +1208,15 @@ ; ; X64-LABEL: test_ccge_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jl .LBB36_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB36_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1248,15 +1248,15 @@ ; ; X64-LABEL: test_ccl_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jge .LBB37_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB37_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1288,15 +1288,15 @@ ; ; X64-LABEL: test_ccle_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jg .LBB38_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB38_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1328,15 +1328,15 @@ ; ; X64-LABEL: test_ccna_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: ja .LBB39_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB39_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1368,15 +1368,15 @@ ; ; X64-LABEL: test_ccnae_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jae .LBB40_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB40_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1408,15 +1408,15 @@ ; ; X64-LABEL: test_ccnb_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jb .LBB41_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB41_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1448,15 +1448,15 @@ ; ; X64-LABEL: test_ccnbe_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jbe .LBB42_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB42_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1488,15 +1488,15 @@ ; ; X64-LABEL: test_ccnc_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jb .LBB43_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB43_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1528,15 +1528,15 @@ ; ; X64-LABEL: test_ccne_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: je .LBB44_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB44_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1568,15 +1568,15 @@ ; ; X64-LABEL: test_ccnz_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: je .LBB45_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB45_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1608,15 +1608,15 @@ ; ; X64-LABEL: test_ccng_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jg .LBB46_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB46_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1648,15 +1648,15 @@ ; ; X64-LABEL: test_ccnge_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jge .LBB47_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB47_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1688,15 +1688,15 @@ ; ; X64-LABEL: test_ccnl_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jl .LBB48_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB48_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1728,15 +1728,15 @@ ; ; X64-LABEL: test_ccnle_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jle .LBB49_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB49_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1768,15 +1768,15 @@ ; ; X64-LABEL: test_ccno_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jo .LBB50_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB50_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1808,15 +1808,15 @@ ; ; X64-LABEL: test_ccnp_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jp .LBB51_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB51_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1848,15 +1848,15 @@ ; ; X64-LABEL: test_ccns_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: js .LBB52_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB52_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1888,15 +1888,15 @@ ; ; X64-LABEL: test_cco_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jno .LBB53_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB53_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1928,15 +1928,15 @@ ; ; X64-LABEL: test_ccp_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jnp .LBB54_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB54_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind @@ -1968,15 +1968,15 @@ ; ; X64-LABEL: test_ccs_branch: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: cmp %rdi,(%rsi) ; X64-NEXT: #NO_APP ; X64-NEXT: jns .LBB55_2 ; X64-NEXT: # %bb.1: # %then +; X64-NEXT: pushq %rax ; X64-NEXT: callq bar@PLT +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB55_2: # %exit -; X64-NEXT: popq %rax ; X64-NEXT: retq entry: %cc = tail call i8 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind diff --git a/llvm/test/CodeGen/X86/pr56103.ll b/llvm/test/CodeGen/X86/pr56103.ll --- a/llvm/test/CodeGen/X86/pr56103.ll +++ b/llvm/test/CodeGen/X86/pr56103.ll @@ -11,7 +11,6 @@ define dso_local i32 @main() nounwind { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq e@GOTPCREL(%rip), %rax ; CHECK-NEXT: movw $1, (%rax) ; CHECK-NEXT: movq b@GOTPCREL(%rip), %rax @@ -32,9 +31,9 @@ ; CHECK-NEXT: jle .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_2: # %if.then +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq abort@PLT entry: store i16 1, ptr @e, align 2 diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll --- a/llvm/test/CodeGen/X86/test-shrink-bug.ll +++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll @@ -64,8 +64,6 @@ ; ; CHECK-X64-LABEL: fail: ; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: pushq %rax -; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 ; CHECK-X64-NEXT: je .LBB1_3 ; CHECK-X64-NEXT: # %bb.1: @@ -75,10 +73,12 @@ ; CHECK-X64-NEXT: testb $1, %al ; CHECK-X64-NEXT: jne .LBB1_3 ; CHECK-X64-NEXT: # %bb.2: # %no +; CHECK-X64-NEXT: pushq %rax +; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-X64-NEXT: callq bar@PLT -; CHECK-X64-NEXT: .LBB1_3: # %yes ; CHECK-X64-NEXT: popq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X64-NEXT: .LBB1_3: # %yes ; CHECK-X64-NEXT: retq %1 = icmp eq <2 x i8> %b, %2 = extractelement <2 x i1> %1, i32 1 diff --git a/llvm/test/CodeGen/X86/xchg-nofold.ll b/llvm/test/CodeGen/X86/xchg-nofold.ll --- a/llvm/test/CodeGen/X86/xchg-nofold.ll +++ b/llvm/test/CodeGen/X86/xchg-nofold.ll @@ -9,7 +9,6 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(ptr nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind { ; CHECK-LABEL: _Z3fooRSt6atomicIbEb: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: movq %rdi, %rcx ; CHECK-NEXT: shrq $3, %rcx @@ -25,9 +24,9 @@ ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: xchgb %cl, (%rdi) ; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __asan_report_store1@PLT ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -7,7 +7,7 @@ ; CHECK-LABEL: test1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #40000 +; CHECK-NEXT: mov w10, #40000 // =0x9c40 ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: add x9, x9, x10 ; CHECK-NEXT: cmp w8, w1 @@ -47,7 +47,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cbz x0, .LBB1_3 ; CHECK-NEXT: // %bb.1: // %while_cond.preheader -; CHECK-NEXT: mov w9, #40000 +; CHECK-NEXT: mov w9, #40000 // =0x9c40 ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: add x9, x0, x9 ; CHECK-NEXT: cmp w8, w1 @@ -89,7 +89,7 @@ ; CHECK-NEXT: csel x9, x1, x0, ne ; CHECK-NEXT: cbz x9, .LBB2_3 ; CHECK-NEXT: // %bb.1: // %while_cond.preheader -; CHECK-NEXT: mov w10, #40000 +; CHECK-NEXT: mov w10, #40000 // =0x9c40 ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: add x9, x9, x10 ; CHECK-NEXT: cmp w8, w3 @@ -151,7 +151,7 @@ ; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: mov w20, wzr -; CHECK-NEXT: mov w21, #40000 +; CHECK-NEXT: mov w21, #40000 // =0x9c40 ; CHECK-NEXT: .LBB3_1: // %while_cond ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: .Ltmp0: @@ -261,18 +261,19 @@ define void @test_invariant_group(i32 %arg, i1 %c) { ; CHECK-LABEL: test_invariant_group: ; CHECK: // %bb.0: // %bb -; CHECK-NEXT: tbz w1, #0, .LBB5_3 +; CHECK-NEXT: tbz w1, #0, .LBB5_4 ; CHECK-NEXT: // %bb.1: // %bb6 -; CHECK-NEXT: cbz w0, .LBB5_4 +; CHECK-NEXT: cbz w0, .LBB5_3 ; CHECK-NEXT: .LBB5_2: // %bb1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: tbnz w1, #0, .LBB5_2 -; CHECK-NEXT: .LBB5_3: // %bb5 +; CHECK-NEXT: b .LBB5_4 +; CHECK-NEXT: .LBB5_3: // %bb2 +; CHECK-NEXT: tbz w1, #0, .LBB5_5 +; CHECK-NEXT: .LBB5_4: // %bb5 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_4: // %bb2 -; CHECK-NEXT: tbnz w1, #0, .LBB5_3 -; CHECK-NEXT: // %bb.5: // %bb4 -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: .LBB5_5: // %bb4 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str x8, [x8] ; CHECK-NEXT: ret bb: