Index: llvm/lib/CodeGen/RegAllocGreedy.h =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.h +++ llvm/lib/CodeGen/RegAllocGreedy.h @@ -154,6 +154,11 @@ using PQueue = std::priority_queue>; using SmallLISet = SmallPtrSet; + // We need to track all tentative recolorings so we can roll back any + // successful and unsuccessful recoloring attempts. + using RecoloringStack = + SmallVector, 8>; + // context MachineFunction *MF; @@ -351,7 +356,7 @@ private: MCRegister selectOrSplitImpl(const LiveInterval &, SmallVectorImpl &, SmallVirtRegSet &, - unsigned = 0); + RecoloringStack &, unsigned = 0); bool LRE_CanEraseVirtReg(Register) override; void LRE_WillShrinkVirtReg(Register) override; @@ -417,9 +422,10 @@ SmallVectorImpl &, const SmallVirtRegSet &); unsigned tryLastChanceRecoloring(const LiveInterval &, AllocationOrder &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned); + SmallVirtRegSet &, RecoloringStack &, + unsigned); bool tryRecoloringCandidates(PQueue &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned); + SmallVirtRegSet &, RecoloringStack &, unsigned); void tryHintRecoloring(const LiveInterval &); void tryHintsRecoloring(); Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1913,6 +1913,10 @@ /// (split, spill) during the process and that must be assigned. /// \p FixedRegisters contains all the virtual registers that cannot be /// recolored. +/// +/// \p RecolorStack tracks the original assignments of successfully recolored +/// registers. +/// /// \p Depth gives the current depth of the last chance recoloring. /// \return a physical register that can be used for VirtReg or ~0u if none /// exists. @@ -1920,11 +1924,15 @@ AllocationOrder &Order, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg)) return ~0u; LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + + const ssize_t EntryStackSize = RecolorStack.size(); + // Ranges must be Done. assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && "Last chance recoloring should really be last chance"); @@ -1940,9 +1948,7 @@ // Set of Live intervals that will need to be recolored. SmallLISet RecoloringCandidates; - // Record the original mapping virtual register to physical register in case - // the recoloring fails. - DenseMap VirtRegToPhysReg; + // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". assert(!FixedRegisters.count(VirtReg.reg())); @@ -1954,7 +1960,6 @@ LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); - VirtRegToPhysReg.clear(); CurrentNewVRegs.clear(); // It is only possible to recolor virtual register interference. @@ -1985,7 +1990,8 @@ "Interferences are supposed to be with allocated variables"); // Record the current allocation. - VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); + RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg))); + // unset the related struct. Matrix->unassign(*RC); } @@ -2000,7 +2006,7 @@ // at this point for the next physical register. SmallVirtRegSet SaveFixedRegisters(FixedRegisters); if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, - FixedRegisters, Depth)) { + FixedRegisters, RecolorStack, Depth)) { // Push the queued vregs into the main queue. for (Register NewVReg : CurrentNewVRegs) NewVRegs.push_back(NewVReg); @@ -2027,13 +2033,30 @@ NewVRegs.push_back(R); } - for (const LiveInterval *RC : RecoloringCandidates) { - Register ItVirtReg = RC->reg(); - if (VRM->hasPhys(ItVirtReg)) - Matrix->unassign(*RC); - MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg]; - Matrix->assign(*RC, ItPhysReg); + // Roll back our unsuccessful recoloring. Also roll back any successful + // recolorings in any recursive recoloring attempts, since it's possible + // they would have introduced conflicts with assignments we will be + // restoring further up the stack. Perform all unassignments prior to + // reassigning, since sub-recolorings may have conflicted with the registers + // we are going to restore to their original assignments. + for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) { + const LiveInterval *LI; + MCRegister PhysReg; + std::tie(LI, PhysReg) = RecolorStack[I]; + + if (VRM->hasPhys(LI->reg())) + Matrix->unassign(*LI); + } + + for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) { + const LiveInterval *LI; + MCRegister PhysReg; + std::tie(LI, PhysReg) = RecolorStack[I]; + Matrix->assign(*LI, PhysReg); } + + // Pop the stack of recoloring attempts. + RecolorStack.resize(EntryStackSize); } // Last chance recoloring did not worked either, give up. @@ -2051,12 +2074,13 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { while (!RecoloringQueue.empty()) { const LiveInterval *LI = dequeue(RecoloringQueue); LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); - MCRegister PhysReg = - selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, + RecolorStack, Depth + 1); // When splitting happens, the live-range may actually be empty. // In that case, this is okay to continue the recoloring even // if we did not find an alternative color for it. Indeed, @@ -2088,7 +2112,9 @@ CutOffInfo = CO_None; LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; - MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + RecoloringStack RecolorStack; + MCRegister Reg = + selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack); if (Reg == ~0U && (CutOffInfo != CO_None)) { uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); if (CutOffEncountered == CO_Depth) @@ -2349,6 +2375,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { uint8_t CostPerUseLimit = uint8_t(~0u); // First try assigning a free register. @@ -2423,9 +2450,10 @@ // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class will report it. - if (Stage >= RS_Done || !VirtReg.isSpillable()) + if (Stage >= RS_Done || !VirtReg.isSpillable()) { return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, - Depth); + RecolorStack, Depth); + } // Finally spill VirtReg itself. if ((EnableDeferredSpilling || Index: llvm/test/CodeGen/AMDGPU/issue48473.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/issue48473.mir @@ -0,0 +1,81 @@ +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - 2> %t.err %s | FileCheck %s +# RUN: FileCheck -check-prefix=ERR %s < %t.err + +# ERR: error: register allocation failed: maximum depth for recoloring reached. Use -fexhaustive-register-search to skip cutoffs +# ERR-NEXT: error: ran out of registers during register allocation + +# This testcase used to fail with an "overlapping insert" assertion +# when trying to roll back an unsucessful recoloring of %25. One of +# the interfering vregs is successfully recolored, and the other is +# not. We need to roll back the successfully recolored interfering +# vreg in order to avoid conflicting with the original assignment of +# the original register when rolling back the second. + + +# %25 initially assigned to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 +# interfering candidates %15 %17 +# assigned %15 to $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 +# %15 +# %18 -> normal recolored $sgpr28_sgpr29_sgpr30_sgpr31 + # %20 -> normal recolored $sgpr60_sgpr61_sgpr62_sgpr63 +# %17 candidates %37 %39 +# tentative assign %17 $sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 +# %37 to $sgpr72_sgpr73_sgpr74_sgpr75 succeeded +# %39 last chance recoloring, fails max depth +# Fail to assign: %17 to $sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 at depth 4 +# %37 reassign to $sgpr84_sgpr85_sgpr86_sgpr87 unassign from $sgpr72_sgpr73_sgpr74_sgpr75 +# %39 reassign to $sgpr88_sgpr89_sgpr90_sgpr91 +# %17 candidates %39 %41 +# Try to assign: %17 to $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 +# %39 Try assign to $sgpr72_sgpr73_sgpr74_sgpr75 succeeded +# %41 last chance recoloring, fail max depth +# %39 reassign to $sgpr88_sgpr89_sgpr90_sgpr91 unassign from $sgpr72_sgpr73_sgpr74_sgpr75 +# %41 reassign to $sgpr92_sgpr93_sgpr94_sgpr95 +# %17 candidates %41 %16 +# Try assign %41 to $sgpr72_sgpr73_sgpr74_sgpr75 succeeded +# %16 last chance recolor, fail max depth +# fail to recolor %17 +# +# Have to roll back the succesful recoloring of %15 when %17's +# recoloring failed. Previously we would leave the recoloring of %18 +# and %20 in place. The recoloring of %20 to +# $sgpr60_sgpr61_sgpr62_sgpr63 conflicts with the parent restore of +# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + +# CHECK-LABEL: name: issue48473 +# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + +--- +name: issue48473 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr4' } + occupancy: 20 +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + + %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:sgpr_256 = S_LOAD_DWORDX8_IMM undef %3:sgpr_64, 1000, 0 :: (load 32, addrspace 6) + %4:sgpr_256 = S_LOAD_DWORDX8_IMM undef %3:sgpr_64, 1088, 0 :: (load 32, addrspace 6) + %5:sgpr_256 = S_LOAD_DWORDX8_IMM undef %3:sgpr_64, 1152, 0 :: (load 32, addrspace 6) + %6:sgpr_256 = S_LOAD_DWORDX8_IMM undef %3:sgpr_64, 1216, 0 :: (load 32, addrspace 6) + %7:sgpr_256 = S_LOAD_DWORDX8_IMM undef %3:sgpr_64, 1280, 0 :: (load 32, addrspace 6) + %8:sgpr_256 = S_LOAD_DWORDX8_IMM undef %3:sgpr_64, 1408, 0 :: (load 32, addrspace 6) + %9:sgpr_128 = S_LOAD_DWORDX4_IMM undef %3:sgpr_64, 0, 0 :: (load 16, addrspace 6) + %10:sgpr_128 = S_LOAD_DWORDX4_IMM undef %3:sgpr_64, 0, 0 :: (load 16, addrspace 6) + %11:sgpr_128 = S_LOAD_DWORDX4_IMM undef %3:sgpr_64, 0, 0 :: (load 16, addrspace 6) + %12:sgpr_128 = S_LOAD_DWORDX4_IMM undef %3:sgpr_64, 0, 0 :: (load 16, addrspace 6) + %13:sgpr_128 = S_LOAD_DWORDX4_IMM undef %3:sgpr_64, 0, 0 :: (load 16, addrspace 6) + %14:sgpr_128 = IMPLICIT_DEF + S_NOP 0, implicit-def %15:sgpr_256, implicit-def %16:sgpr_128, implicit-def %17:sgpr_256 + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %4, implicit %9, implicit %5, implicit %10, implicit %6, implicit %11, implicit %8, implicit %13, implicit %7, implicit %12, implicit %17, implicit %17, implicit %16, implicit %14, implicit %15 + S_ENDPGM 0 + +... Index: llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir @@ -0,0 +1,61 @@ +# RUN: not llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: not --crash llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s + +# FIXME: We should not produce a verifier error after erroring + +# ERR: error: inline assembly requires more registers than available +# VERIFIER: *** Bad machine code: Using an undefined physical register *** + +# This testcase cannot be compiled with the enforced register +# budget. Previously, tryLastChanceRecoloring would assert here. It +# was attempting to recolor a superregister with an overlapping +# subregister over the same range. + +--- | + define void @foo() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="8,8" } + +... +--- +name: foo +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vreg_512 } + - { id: 3, class: vreg_256 } + - { id: 4, class: vreg_128 } + - { id: 5, class: vreg_96 } + - { id: 6, class: vreg_96 } + - { id: 7, class: vreg_512 } + - { id: 8, class: vreg_256 } + - { id: 9, class: vreg_128 } + - { id: 10, class: vreg_96 } + - { id: 11, class: vreg_96 } + - { id: 12, class: sreg_64 } + - { id: 13, class: sgpr_64 } + - { id: 14, class: vgpr_32 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0 (%ir-block.0): + + INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $agpr0 + %14:vgpr_32 = COPY killed $agpr0 + INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 11534346 /* regdef:VReg_512 */, def %7, 10158090 /* regdef:VReg_256 */, def %8, 4784138 /* regdef:VReg_128 */, def %9, 3670026 /* regdef:VReg_96 */, def %10, 3670026 /* regdef:VReg_96 */, def %11 + INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 11534345 /* reguse:VReg_512 */, %7 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 10158089 /* reguse:VReg_256 */, %8 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, %9 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %10 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %11 + $agpr1 = COPY %14 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $agpr1 + SI_RETURN + +... Index: llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+f,+m,+zfh,+experimental-zvfh < %s | FileCheck %s + +; This testcase failed to compile after +; c46aab01c002b7a04135b8b7f1f52d8c9ae23a58, which was reverted. + +; FIXME: The failure does not reproduce with -stop-before=greedy +; output MIR with -start-before=greedy + +define void @last_chance_recoloring_failure() { +; CHECK-LABEL: last_chance_recoloring_failure: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: li a0, 55 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vloxseg2ei32.v v8, (a0), v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vmclr.m v0 +; CHECK-NEXT: li s0, 36 +; CHECK-NEXT: vsetvli zero, s0, e16, m4, tu, mu +; CHECK-NEXT: vfwadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: call func +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vrgather.vv v4, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, mu +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwsub.wv v16, v8, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vssubu.vv v4, v4, v8, v0.t +; CHECK-NEXT: vsetvli zero, s0, e32, m8, tu, mu +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfdiv.vv v8, v16, v8, v0.t +; CHECK-NEXT: vse32.v v8, (a0) +entry: + %i = call { , } @llvm.riscv.vloxseg2.nxv16f16.nxv16i32.i64(half* nonnull poison, poison, i64 55) + %i1 = extractvalue { , } %i, 0 + %i2 = call @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16.i64( poison, poison, poison, zeroinitializer, i64 36, i64 0) + call void @func() + %i3 = call @llvm.riscv.vrgather.vv.mask.nxv16i16.i64( poison, poison, poison, poison, i64 32, i64 0) + %i4 = call @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16.i64( poison, %i2, %i1, i64 36) + %i5 = call @llvm.riscv.vssubu.mask.nxv16i16.nxv16i16.i64( %i3, %i3, poison, poison, i64 32, i64 0) + %i6 = call @llvm.riscv.vfdiv.mask.nxv16f32.nxv16f32.i64( %i4, %i2, poison, poison, i64 36, i64 0) + call void @llvm.riscv.vse.nxv16f32.i64( %i6, * nonnull poison, i64 36) + ret void +} + +declare void @func() +declare { , } @llvm.riscv.vloxseg2.nxv16f16.nxv16i32.i64(half* nocapture, , i64) +declare @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16.i64(, , , , i64, i64 immarg) +declare @llvm.riscv.vrgather.vv.mask.nxv16i16.i64(, , , , i64, i64 immarg) +declare @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16.i64(, , , i64) +declare @llvm.riscv.vssubu.mask.nxv16i16.nxv16i16.i64(, , , , i64, i64 immarg) +declare @llvm.riscv.vfdiv.mask.nxv16f32.nxv16f32.i64(, , , , i64, i64 immarg) +declare void @llvm.riscv.vse.nxv16f32.i64(, * nocapture, i64) #3