diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -949,14 +949,35 @@ // reserve BP during register allocation and generate BP access in the PEI // pass due to the inconsistent behavior of the function. // - // The function is changed to use hasVInstructions() as the return value. It - // is not precise, but it can make the register allocation correct. + // The function is changed to check if there is any rvv frame object or any + // rvv instruction. It is not precise, but it can make the register allocation + // correct. // // FIXME: Find a better way to make the decision or revisit the solution in // D103622. // // Refer to https://github.com/llvm/llvm-project/issues/53016. - return MF.getSubtarget().hasVInstructions(); + + // Won't have RVV frame object without V/Zve* extensions. + if (!MF.getSubtarget().hasVInstructions()) + return false; + + // If there is scalable vector, there must be RVV frame object. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) + if (MFI.getStackID(I) == TargetStackID::ScalableVector) + return true; + + // If there is any RVV instruction in the function, there might be RVV frame + // object. + for (const auto &MBB : MF) + for (const auto &MI : MBB) + for (const auto &MO : MI.uses()) + if (MO.isReg() && MO.getReg() == RISCV::VL) + return true; + + // Otherwise, there should not be any RVV frame object. + return false; } // Not preserve stack space within prologue for outgoing variables when the diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll --- a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll @@ -71,25 +71,21 @@ define void @foo1(i32* nocapture noundef %p1) { ; CHECK-LABEL: foo1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -192 -; CHECK-NEXT: .cfi_def_cfa_offset 192 -; CHECK-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 168(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: addi s0, sp, 192 +; CHECK-NEXT: addi s0, sp, 128 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: mv s1, sp -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: fld ft0, 0(a0) -; CHECK-NEXT: fsd ft0, 56(s1) # 8-byte Folded Spill -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: addi t0, s1, 64 +; CHECK-NEXT: fsd ft0, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi t0, sp, 64 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: li a1, 2 ; CHECK-NEXT: li a2, 3 @@ -100,17 +96,15 @@ ; CHECK-NEXT: li a7, 8 ; CHECK-NEXT: sd t0, 0(sp) ; CHECK-NEXT: call bar@plt -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: fld ft0, 0(s2) -; CHECK-NEXT: fld ft1, 56(s1) # 8-byte Folded Reload +; CHECK-NEXT: fld ft0, 0(s1) +; CHECK-NEXT: fld ft1, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: fadd.d ft0, ft1, ft0 -; CHECK-NEXT: fsd ft0, 0(s2) -; CHECK-NEXT: addi sp, s0, -192 -; CHECK-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 168(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 160(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 192 +; CHECK-NEXT: fsd ft0, 0(s1) +; CHECK-NEXT: addi sp, s0, -128 +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: ret entry: %vla = alloca [10 x i32], align 64