diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -38,6 +38,10 @@ bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg, int &FrameIdx) const override; + // Update DestReg to have the value of SrcReg plus an Offset. + void adjustReg(MachineBasicBlock::iterator II, Register DestReg, + Register SrcReg, StackOffset Offset) const; + bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -161,6 +161,55 @@ return true; } +void RISCVRegisterInfo::adjustReg(MachineBasicBlock::iterator II, Register DestReg, + Register SrcReg, StackOffset Offset) const { + + if (DestReg == SrcReg && !Offset.getFixed() && !Offset.getScalable()) + return; + + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const RISCVSubtarget &ST = MF.getSubtarget(); + const RISCVInstrInfo *TII = ST.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock &MBB = *MI.getParent(); + + bool SrcRegIsKill = false; + + if (Offset.getScalable()) { + unsigned ScalableAdjOpc = RISCV::ADD; + int64_t ScalableValue = Offset.getScalable(); + if (ScalableValue < 0) { + ScalableValue = -ScalableValue; + ScalableAdjOpc = RISCV::SUB; + } + // Get vlenb and multiply vlen with the number of vector registers. + TII->getVLENFactoredAmount(MF, MBB, II, DL, DestReg, ScalableValue); + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) + .addReg(SrcReg).addReg(DestReg, RegState::Kill); + SrcReg = DestReg; + SrcRegIsKill = true; + } + + if (Offset.getFixed()) { + // TODO: Merge this with FrameLowerings adjustReg which knows a few + // more tricks than this does for fixed offsets. + if (isInt<12>(Offset.getFixed())) { + BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), DestReg) + .addReg(SrcReg, getKillRegState(SrcRegIsKill)) + .addImm(Offset.getFixed()); + } else { + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed()); + BuildMI(MBB, II, DL, TII->get(RISCV::ADD), DestReg) + .addReg(SrcReg, getKillRegState(SrcRegIsKill)) + .addReg(ScratchReg, RegState::Kill); + } + } +} + + bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { @@ -199,108 +248,46 @@ "Frame offsets outside of the signed 32-bit range not supported"); } - MachineBasicBlock &MBB = *MI.getParent(); - bool FrameRegIsKill = false; - - // If the instruction is an ADDI, we can use it's destination as a scratch - // register. Load instructions might have an FP or vector destination and - // stores don't have a destination register. - Register DestReg; - if (MI.getOpcode() == RISCV::ADDI) - DestReg = MI.getOperand(0).getReg(); - - // If required, pre-compute the scalable factor amount which will be used in - // later offset computation. Since this sequence requires up to two scratch - // registers -- after which one is made free -- this grants us better - // scavenging of scratch registers as only up to two are live at one time, - // rather than three. - unsigned ScalableAdjOpc = RISCV::ADD; - if (Offset.getScalable()) { - int64_t ScalableValue = Offset.getScalable(); - if (ScalableValue < 0) { - ScalableValue = -ScalableValue; - ScalableAdjOpc = RISCV::SUB; + if (!IsRVVSpill) { + // TODO: Consider always storing the low bits of the immediate in the + // offset so that large immediate is cheaper to materialize? + if (isInt<12>(Offset.getFixed())) { + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + Offset = StackOffset::get(0, Offset.getScalable()); + } else { + // Since we're going to materialize the full offset below, clear the + // portion encoded in the immediate. + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); } - // Use DestReg if it exists, otherwise create a new register. - if (!DestReg) - DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Get vlenb and multiply vlen with the number of vector registers. - TII->getVLENFactoredAmount(MF, MBB, II, DL, DestReg, ScalableValue); } - if (!isInt<12>(Offset.getFixed())) { - // The offset won't fit in an immediate, so use a scratch register instead - // Modify Offset and FrameReg appropriately. - - // Reuse destination register if it exists and is not holding a scalable - // offset. - Register ScratchReg = DestReg; - if (!DestReg || Offset.getScalable()) { - ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Also save to DestReg if it doesn't exist. - if (!DestReg) - DestReg = ScratchReg; - } - - TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed()); - BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg) - .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(ScratchReg, RegState::Kill); - // If this was an ADDI and there is no scalable offset, we can remove it. - if (MI.getOpcode() == RISCV::ADDI && !Offset.getScalable()) { - assert(MI.getOperand(0).getReg() == ScratchReg && - "Expected to have written ADDI destination register"); - MI.eraseFromParent(); - return true; - } - - Offset = StackOffset::get(0, Offset.getScalable()); - FrameReg = ScratchReg; - FrameRegIsKill = true; - } - - // Add in the scalable offset which has already been computed in DestReg. - if (Offset.getScalable()) { - assert(DestReg && "DestReg should be valid"); - BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) - .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(DestReg, RegState::Kill); - // If this was an ADDI and there is no fixed offset, we can remove it. - if (MI.getOpcode() == RISCV::ADDI && !Offset.getFixed()) { - assert(MI.getOperand(0).getReg() == DestReg && - "Expected to have written ADDI destination register"); - MI.eraseFromParent(); - return true; - } - FrameReg = DestReg; - FrameRegIsKill = true; - } - - // Handle the fixed offset which might be zero. - if (IsRVVSpill) { - // RVVSpills don't have an immediate. Add an ADDI if the fixed offset is - // needed. - if (Offset.getFixed()) { - // Reuse DestReg if it exists, otherwise create a new register. - if (!DestReg) - DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), DestReg) - .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addImm(Offset.getFixed()); - FrameReg = DestReg; - FrameRegIsKill = true; - } + if (Offset.getScalable() || Offset.getFixed()) { + Register DestReg; + if (MI.getOpcode() == RISCV::ADDI) + DestReg = MI.getOperand(0).getReg(); + else + DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + adjustReg(II, DestReg, FrameReg, Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(DestReg, /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/true); } else { - // Otherwise we can replace the original immediate. - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/false); } - // Finally, replace the frame index operand. - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, - FrameRegIsKill); + // If after materializing the adjustment, we have a pointless ADDI, remove it + if (MI.getOpcode() == RISCV::ADDI && + MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getOperand(2).getImm() == 0) { + MI.eraseFromParent(); + return true; + } auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MI.getOpcode()); if (ZvlssegInfo) { + MachineBasicBlock &MBB = *MI.getParent(); Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); uint32_t ShiftAmount = Log2_32(ZvlssegInfo->second); diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -44,10 +44,10 @@ ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x10 = PseudoReadVLENB ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 + ; CHECK-NEXT: $x10 = SUB $x8, killed $x10 ; CHECK-NEXT: $x11 = LUI 1048575 ; CHECK-NEXT: $x11 = ADDIW killed $x11, 1824 - ; CHECK-NEXT: $x11 = ADD $x8, killed $x11 - ; CHECK-NEXT: $x10 = SUB killed $x11, killed $x10 + ; CHECK-NEXT: $x10 = ADD killed $x10, killed $x11 ; CHECK-NEXT: VS1R_V killed renamable $v8, killed renamable $x10 ; CHECK-NEXT: $x2 = frame-destroy ADDI $x8, -2048 ; CHECK-NEXT: $x2 = frame-destroy ADDI killed $x2, -224 diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -92,10 +92,10 @@ ; CHECK-NEXT: $x10 = PseudoReadVLENB ; CHECK-NEXT: $x11 = ADDI killed $x0, 50 ; CHECK-NEXT: $x10 = MUL killed $x10, killed $x11 + ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 ; CHECK-NEXT: $x11 = LUI 1 ; CHECK-NEXT: $x11 = ADDIW killed $x11, -1888 - ; CHECK-NEXT: $x11 = ADD $x2, killed $x11 - ; CHECK-NEXT: $x10 = ADD killed $x11, killed $x10 + ; CHECK-NEXT: $x10 = ADD killed $x10, killed $x11 ; CHECK-NEXT: PseudoVSPILL_M1 killed renamable $v25, killed $x10 :: (store unknown-size into %stack.1, align 8) ; CHECK-NEXT: renamable $x1 = ADDI $x0, 255 ; CHECK-NEXT: renamable $x5 = nuw ADDI $x2, 384 @@ -111,9 +111,11 @@ ; CHECK-NEXT: renamable $x21 = ADDI $x2, 1664 ; CHECK-NEXT: renamable $x22 = ADDI $x2, 1792 ; CHECK-NEXT: renamable $x23 = ADDI $x2, 1920 - ; CHECK-NEXT: $x24 = LUI 1 - ; CHECK-NEXT: $x24 = ADDIW killed $x24, -2048 - ; CHECK-NEXT: $x24 = ADD $x2, killed $x24 + ; CHECK-NEXT: SD killed $x1, $x2, 8 :: (store (s64) into %stack.15) + ; CHECK-NEXT: SD killed $x5, $x2, 0 :: (store (s64) into %stack.16) + ; CHECK-NEXT: $x11 = LUI 1 + ; CHECK-NEXT: $x11 = ADDIW killed $x11, -2048 + ; CHECK-NEXT: $x24 = ADD $x2, killed $x11 ; CHECK-NEXT: renamable $x25 = ADDI $x2, 128 ; CHECK-NEXT: renamable $x26 = ADDI $x2, 128 ; CHECK-NEXT: renamable $x27 = ADDI $x0, 2 @@ -129,18 +131,16 @@ ; CHECK-NEXT: renamable $x16 = SUB killed renamable $x13, renamable $x13 ; CHECK-NEXT: dead renamable $x13 = PseudoVSETIVLI 1, 64 /* e8, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $x13 = nsw ADDI renamable $x16, -2 - ; CHECK-NEXT: SD killed $x10, $x2, 8 :: (store (s64) into %stack.15) - ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: SD killed $x12, $x2, 0 :: (store (s64) into %stack.16) - ; CHECK-NEXT: $x12 = ADDI killed $x0, 50 - ; CHECK-NEXT: $x10 = MUL killed $x10, killed $x12 - ; CHECK-NEXT: $x12 = LUI 1 - ; CHECK-NEXT: $x12 = ADDIW killed $x12, -1888 - ; CHECK-NEXT: $x12 = ADD $x2, killed $x12 - ; CHECK-NEXT: $x10 = ADD killed $x12, killed $x10 - ; CHECK-NEXT: $x12 = LD $x2, 0 :: (load (s64) from %stack.16) - ; CHECK-NEXT: renamable $v0 = PseudoVRELOAD_M1 killed $x10 :: (load unknown-size from %stack.1, align 8) - ; CHECK-NEXT: $x10 = LD $x2, 8 :: (load (s64) from %stack.15) + ; CHECK-NEXT: $x1 = PseudoReadVLENB + ; CHECK-NEXT: $x5 = ADDI killed $x0, 50 + ; CHECK-NEXT: $x1 = MUL killed $x1, killed $x5 + ; CHECK-NEXT: $x1 = ADD $x2, killed $x1 + ; CHECK-NEXT: $x5 = LUI 1 + ; CHECK-NEXT: $x5 = ADDIW killed $x5, -1888 + ; CHECK-NEXT: $x1 = ADD killed $x1, killed $x5 + ; CHECK-NEXT: $x5 = LD $x2, 0 :: (load (s64) from %stack.16) + ; CHECK-NEXT: renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8) + ; CHECK-NEXT: $x1 = LD $x2, 8 :: (load (s64) from %stack.15) ; CHECK-NEXT: renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 3 /* e8 */, 1 /* ta, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: BLT killed renamable $x16, renamable $x27, %bb.2