Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1067,7 +1067,6 @@ "Expected a valid predecessor state."); VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. - MachineInstr *PrevVSETVLIMI = nullptr; bool IsFirstStateChange = true; for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. @@ -1079,7 +1078,6 @@ MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); - PrevVSETVLIMI = &MI; IsFirstStateChange = false; continue; } @@ -1123,42 +1121,16 @@ // register contents are unchanged, the abstract model can change. if (IsFirstStateChange && !needVSETVLIPHI(NewInfo, MBB)) { CurInfo = NewInfo; - PrevVSETVLIMI = nullptr; IsFirstStateChange = false; continue; } - // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it - // with current VL/VTYPE. - bool NeedInsertVSETVLI = true; - if (PrevVSETVLIMI) { - bool HasSameAVL = - CurInfo.hasSameAVL(NewInfo) || - (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && - NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); - // If these two VSETVLI have the same AVL and the same VLMAX, - // we could merge these two VSETVLI. - if (HasSameAVL && - CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - if (isScalarMoveInstr(MI) && - ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || - (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && - NewInfo.hasSameVLMAX(CurInfo)) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - } - if (NeedInsertVSETVLI) - insertVSETVLI(MBB, MI, NewInfo, CurInfo); + insertVSETVLI(MBB, MI, NewInfo, CurInfo); LLVM_DEBUG(dbgs() << "Updated to " << NewInfo << " due to " << MI << " (phase3)\n"); CurInfo = NewInfo; IsFirstStateChange = false; } - PrevVSETVLIMI = nullptr; } // If this is something updates VL/VTYPE that we don't know about, set @@ -1167,7 +1139,6 @@ MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); IsFirstStateChange = false; - PrevVSETVLIMI = nullptr; } // If we reach the end of the block and our current info doesn't match the @@ -1247,6 +1218,95 @@ for (MachineBasicBlock &MBB : MF) emitVSETVLIs(MBB); + // Now that all vsetvlis are explicit, go through and do block local + // DSE and peephole based demanded feilds based transforms. + // TODO: We should probably turn this block and the next into a small + // worklist based combined style mechanism. + MachineInstr *PrevMI = nullptr; + bool UsedVL = false, UsedVTYPE = false; + SmallVector ToDelete; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // Note: Must be *before* vsetvli handling to account for config cases + // which only change some subfields. + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) + UsedVL = true; + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) + UsedVTYPE = true; + + if (!isVectorConfigInstr(MI)) + continue; + + auto setPrevMI = [&]() { + PrevMI = &MI; + UsedVL = false; + UsedVTYPE = false; + Register VRegDef = MI.getOperand(0).getReg(); + if (VRegDef != RISCV::X0 && !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) + UsedVL = true; + }; + + if (!PrevMI) { + setPrevMI(); + continue; + } + if (!UsedVL && !UsedVTYPE) { + LLVM_DEBUG(dbgs() << "Queue delete (dead): " << PrevMI << "\n"); + ToDelete.push_back(PrevMI); + setPrevMI(); + continue; + } + + if (!UsedVTYPE) { + auto isVLPreserveForm = [](MachineInstr &MI) { + if (MI.getOpcode() == RISCV::PseudoVSETVLIX0) { + Register DestReg = MI.getOperand(0).getReg(); + Register AVLReg = MI.getOperand(1).getReg(); + return DestReg == RISCV::X0 && AVLReg == RISCV::X0; + } + return false; + }; + + if (isVLPreserveForm(MI) || isVLPreserveForm(*PrevMI)) { + setPrevMI(); + continue; + } + + const VSETVLIInfo PrevInfo = getInfoForVSETVLI(*PrevMI); + const VSETVLIInfo NewInfo = getInfoForVSETVLI(MI); + bool HasSameAVL = + PrevInfo.hasSameAVL(NewInfo) || + (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && + NewInfo.getAVLReg() == PrevMI->getOperand(0).getReg()); + + // If these two VSETVLI have the same AVL and the same VLMAX, + // we could merge these two VSETVLI. + if (HasSameAVL && + PrevInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { + PrevMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); + LLVM_DEBUG(dbgs() << "Queue delete (same avl - rewrite): " << MI << "\n"); + // TODO: If the current MI's GPR result is used, we can often + // replace it instead of just leaving the otherwise pointless + // VSETVLI around. + auto isVSETVLDefDead = [&](MachineInstr &MI) { + Register VRegDef = MI.getOperand(0).getReg(); + return VRegDef == RISCV::X0 || MI.getOperand(0).isDead() || + (VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)); + }; + if (isVSETVLDefDead(MI)) + ToDelete.push_back(&MI); + else + setPrevMI(); + continue; + } + } + setPrevMI(); + } + } + + for (auto *MI : ToDelete) + MI->eraseFromParent(); + // Once we're fully done rewriting all the instructions, do a final pass // through to check for VSETVLIs which write to an unused destination. // For the non X0, X0 variant, we can replace the destination register Index: llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll +++ llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll @@ -116,7 +116,6 @@ define @repeated_vsetvli(i32 %avl, * %ptr) nounwind { ; CHECK-LABEL: repeated_vsetvli: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll +++ llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll @@ -134,7 +134,6 @@ define @repeated_vsetvli(i64 %avl, * %ptr) nounwind { ; CHECK-LABEL: repeated_vsetvli: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -591,21 +591,20 @@ ; CHECK-NEXT: blez a0, .LBB11_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: li t1, 0 +; CHECK-NEXT: li t0, 0 ; CHECK-NEXT: slli a7, a6, 3 ; CHECK-NEXT: .LBB11_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a2, a5 -; CHECK-NEXT: vsetvli zero, a6, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v8, (t0) +; CHECK-NEXT: add a4, a2, a5 +; CHECK-NEXT: vle64.v v8, (a4) ; CHECK-NEXT: add a4, a3, a5 ; CHECK-NEXT: vle64.v v9, (a4) ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: add a4, a1, a5 ; CHECK-NEXT: vse64.v v8, (a4) -; CHECK-NEXT: add t1, t1, a6 +; CHECK-NEXT: add t0, t0, a6 ; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: blt t1, a0, .LBB11_2 +; CHECK-NEXT: blt t0, a0, .LBB11_2 ; CHECK-NEXT: .LBB11_3: # %for.end ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -34,7 +34,8 @@ define @test2(i64 %avl, %a, %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret entry: @@ -150,7 +151,7 @@ define @test7( %a, i64 %b, %mask) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, mu +; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: @@ -165,7 +166,7 @@ define @test8( %a, i64 %b, %mask) nounwind { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, mu +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: @@ -196,7 +197,7 @@ define @test10( %a, double %b) nounwind { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu +; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: @@ -209,7 +210,7 @@ define @test11( %a, double %b) nounwind { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, mu +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: @@ -281,10 +282,11 @@ define @test15(i64 %avl, %a, %b) nounwind { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret entry: %vsetvli = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 7) @@ -354,7 +356,8 @@ define @test18( %a, double %b) nounwind { ; CHECK-LABEL: test18: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli a0, 6, e64, m1, tu, mu +; CHECK-NEXT: vsetivli a0, 6, e64, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -382,7 +385,7 @@ define @test19( %a, double %b) nounwind { ; CHECK-LABEL: test19: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, mu +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu