diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -213,6 +213,24 @@ MachineBasicBlock &MBB = *MI.getParent(); bool FrameRegIsKill = false; + // If required, pre-compute the scalable factor amount which will be used in + // later offset computation. Since this sequence requires up to two scratch + // registers -- after which one is made free -- this grants us better + // scavenging of scratch registers as only up to two are live at one time, + // rather than three. + Register ScalableFactorRegister; + unsigned ScalableAdjOpc = RISCV::ADD; + if (Offset.getScalable()) { + int64_t ScalableValue = Offset.getScalable(); + if (ScalableValue < 0) { + ScalableValue = -ScalableValue; + ScalableAdjOpc = RISCV::SUB; + } + // 1. Get vlenb && multiply vlen with the number of vector registers. + ScalableFactorRegister = + TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue); + } + if (!isInt<12>(Offset.getFixed())) { // The offset won't fit in an immediate, so use a scratch register instead // Modify Offset and FrameReg appropriately @@ -251,29 +269,22 @@ } } else { // Offset = (fixed offset, scalable offset) - unsigned Opc = RISCV::ADD; - int64_t ScalableValue = Offset.getScalable(); - if (ScalableValue < 0) { - ScalableValue = -ScalableValue; - Opc = RISCV::SUB; - } - - // 1. Get vlenb && multiply vlen with number of vector register. - Register FactorRegister = - TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue); + // Step 1, the scalable offset, has already been computed. + assert(ScalableFactorRegister && + "Expected pre-computation of scalable factor in earlier step"); // 2. Calculate address: FrameReg + result of multiply if (MI.getOpcode() == RISCV::ADDI && !Offset.getFixed()) { - BuildMI(MBB, II, DL, TII->get(Opc), MI.getOperand(0).getReg()) + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), MI.getOperand(0).getReg()) .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(FactorRegister, RegState::Kill); + .addReg(ScalableFactorRegister, RegState::Kill); MI.eraseFromParent(); return; } Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(Opc), VL) + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), VL) .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(FactorRegister, RegState::Kill); + .addReg(ScalableFactorRegister, RegState::Kill); if (isRVV && Offset.getFixed()) { // Scalable load/store has no immediate argument. We need to add the diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -40,10 +40,10 @@ ; CHECK: $x2 = SUB $x2, killed $x12 ; CHECK: dead renamable $x11 = PseudoVSETVLI killed renamable $x11, 88, implicit-def $vl, implicit-def $vtype ; CHECK: renamable $v25 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 64, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) + ; CHECK: $x11 = PseudoReadVLENB ; CHECK: $x10 = LUI 1048575 ; CHECK: $x10 = ADDIW killed $x10, 1824 ; CHECK: $x10 = ADD $x8, killed $x10 - ; CHECK: $x11 = PseudoReadVLENB ; CHECK: $x10 = SUB killed $x10, killed $x11 ; CHECK: VS1R_V killed renamable $v25, killed renamable $x10 ; CHECK: $x10 = PseudoReadVLENB diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -90,12 +90,12 @@ ; CHECK: $x2 = ANDI $x2, -128 ; CHECK: dead renamable $x15 = PseudoVSETIVLI 1, 72, implicit-def $vl, implicit-def $vtype ; CHECK: renamable $v25 = PseudoVMV_V_X_M1 killed renamable $x12, $noreg, 16, implicit $vl, implicit $vtype + ; CHECK: $x11 = PseudoReadVLENB + ; CHECK: $x10 = ADDI $x0, 50 + ; CHECK: $x11 = MUL killed $x11, killed $x10 ; CHECK: $x10 = LUI 1 ; CHECK: $x10 = ADDIW killed $x10, -1896 ; CHECK: $x10 = ADD $x2, killed $x10 - ; CHECK: $x11 = PseudoReadVLENB - ; CHECK: $x12 = ADDI $x0, 50 - ; CHECK: $x11 = MUL killed $x11, killed $x12 ; CHECK: $x10 = ADD killed $x10, killed $x11 ; CHECK: PseudoVSPILL_M1 killed renamable $v25, killed $x10 :: (store unknown-size into %stack.1, align 8) ; CHECK: renamable $x1 = ADDI $x0, 255 @@ -133,25 +133,25 @@ ; CHECK: renamable $x9 = SRLI killed renamable $x9, 62 ; CHECK: renamable $x9 = ADD renamable $x13, killed renamable $x9 ; CHECK: renamable $x9 = ANDI killed renamable $x9, -4 - ; CHECK: renamable $x16 = SUB killed renamable $x13, killed renamable $x9 + ; CHECK: renamable $x16 = SUB killed renamable $x13, renamable $x9 ; CHECK: dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype ; CHECK: renamable $x13 = nsw ADDI renamable $x16, -2 - ; CHECK: $x5 = LUI 1 - ; CHECK: $x9 = ADDIW killed $x5, -1896 - ; CHECK: $x9 = ADD $x2, killed $x9 - ; CHECK: $x1 = PseudoReadVLENB - ; CHECK: $x5 = ADDI $x0, 50 - ; CHECK: $x1 = MUL killed $x1, killed $x5 + ; CHECK: $x5 = PseudoReadVLENB + ; CHECK: $x1 = ADDI $x0, 50 + ; CHECK: $x5 = MUL killed $x5, killed $x1 + ; CHECK: $x1 = LUI 1 + ; CHECK: $x1 = ADDIW killed $x1, -1896 + ; CHECK: $x1 = ADD $x2, killed $x1 + ; CHECK: $x1 = ADD killed $x1, killed $x5 ; CHECK: $x5 = LD $x2, 0 :: (load 8 from %stack.17) - ; CHECK: $x9 = ADD killed $x9, killed $x1 + ; CHECK: renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8) ; CHECK: $x1 = LD $x2, 8 :: (load 8 from %stack.16) - ; CHECK: renamable $v0 = PseudoVRELOAD_M1 killed $x9 :: (load unknown-size from %stack.1, align 8) ; CHECK: renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 8, implicit $vl, implicit $vtype ; CHECK: renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 8, implicit $vl, implicit $vtype ; CHECK: BLT killed renamable $x16, renamable $x27, %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 + ; CHECK: liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 ; CHECK: renamable $x9 = COPY killed renamable $x13 ; CHECK: PseudoBR %bb.2 ; CHECK: bb.2: @@ -212,7 +212,7 @@ renamable $x9 = SRLI killed renamable $x9, 62 renamable $x9 = ADD renamable $x13, killed renamable $x9 renamable $x9 = ANDI killed renamable $x9, -4 - renamable $x16 = SUB killed renamable $x13, killed renamable $x9 + renamable $x16 = SUB killed renamable $x13, renamable $x9 dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype renamable $x13 = nsw ADDI renamable $x16, -2 renamable $v0 = PseudoVRELOAD_M1 %stack.1 :: (load unknown-size from %stack.1, align 8) @@ -222,7 +222,7 @@ bb.1: successors: %bb.2 - liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 + liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 renamable $x9 = COPY killed renamable $x13 PseudoBR %bb.2