diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -33,6 +33,10 @@ #define DEBUG_TYPE "riscv-insert-vsetvli" #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" +static cl::opt DisableInsertVSETVLPHIOpt( + "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, + cl::desc("Disable looking through phis when inserting vsetvlis.")); + namespace { class VSETVLIInfo { @@ -285,6 +289,7 @@ private: bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); + bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB); void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); @@ -526,6 +531,55 @@ WorkList.push(S); } +// If we weren't able to prove a vsetvli was directly unneeded, it might still +// be/ unneeded if the AVL is a phi node where all incoming values are VL +// outputs from the last VSETVLI in their respective basic blocks. +bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, + const MachineBasicBlock &MBB) { + if (DisableInsertVSETVLPHIOpt) + return true; + + if (!Require.hasAVLReg()) + return true; + + Register AVLReg = Require.getAVLReg(); + if (!AVLReg.isVirtual()) + return true; + + // We need the AVL to be produce by a PHI node in this basic block. + MachineInstr *PHI = MRI->getVRegDef(AVLReg); + if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) + return true; + + for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; + PHIOp += 2) { + Register InReg = PHI->getOperand(PHIOp).getReg(); + MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); + const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; + // If the exit from the predecessor has the VTYPE we are looking for + // we might be able to avoid a VSETVLI. + if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) + return true; + + // We need the PHI input to the be the output of a VSET(I)VLI. + MachineInstr *DefMI = MRI->getVRegDef(InReg); + if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI && + DefMI->getOpcode() != RISCV::PseudoVSETIVLI)) + return true; + + // We found a VSET(I)VLI make sure it matches the output of the + // predecessor block. + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + if (!DefInfo.hasSameAVL(PBBInfo.Exit) || + !DefInfo.hasSameVTYPE(PBBInfo.Exit)) + return true; + } + + // If all the incoming values to the PHI checked out, we don't need + // to insert a VSETVLI. + return false; +} + void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; @@ -564,7 +618,8 @@ // use the predecessor information. assert(BlockInfo[MBB.getNumber()].Pred.isValid() && "Expected a valid predecessor state."); - if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) { + if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && + needVSETVLIPHI(NewInfo, MBB)) { insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); CurInfo = NewInfo; } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -83,8 +83,6 @@ ret %3 } -; FIXME: The last vsetvli is redundant, but we need to look through a phi to -; prove it. define @test3(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry @@ -92,12 +90,11 @@ ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; CHECK-NEXT: vfadd.vv v25, v8, v9 -; CHECK-NEXT: j .LBB2_3 +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_2: # %if.else ; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; CHECK-NEXT: vfsub.vv v25, v8, v9 -; CHECK-NEXT: .LBB2_3: # %if.end -; CHECK-NEXT: vsetvli zero, a0, e64,m1,ta,mu ; CHECK-NEXT: vfmul.vv v8, v25, v8 ; CHECK-NEXT: ret entry: @@ -445,8 +442,6 @@ ret %3 } -; FIXME: The vsetvli in for.body can be removed, it's redundant by its -; predecessors, but we need to look through a PHI to prove it. define void @saxpy_vec(i64 %n, float %a, float* nocapture readonly %x, float* nocapture %y) { ; CHECK-LABEL: saxpy_vec: ; CHECK: # %bb.0: # %entry @@ -456,12 +451,11 @@ ; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: .LBB8_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a4, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: slli a1, a4, 2 ; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: vsetvli zero, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetvli zero, a4, e32,m8,tu,mu ; CHECK-NEXT: vfmacc.vf v16, ft0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vse32.v v16, (a3)