diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -462,6 +462,7 @@ void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doLocalPrepass(MachineBasicBlock &MBB); + void doLocalPostpass(MachineBasicBlock &MBB); void doPRE(MachineBasicBlock &MBB); }; @@ -478,6 +479,15 @@ MI.getOpcode() == RISCV::PseudoVSETIVLI; } +/// Return true if this is 'vsetvli x0, x0, vtype' which preserves +/// VL and only sets VTYPE. +static bool isVLPreservingConfig(const MachineInstr &MI) { + if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) + return false; + assert(RISCV::X0 == MI.getOperand(1).getReg()); + return RISCV::X0 == MI.getOperand(0).getReg(); +} + static MachineInstr *elideCopies(MachineInstr *MI, const MachineRegisterInfo *MRI) { while (true) { @@ -1065,9 +1075,6 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; - // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. - MachineInstr *PrevVSETVLIMI = nullptr; - for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { @@ -1078,7 +1085,6 @@ MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); - PrevVSETVLIMI = &MI; continue; } @@ -1125,29 +1131,10 @@ // vl/vtype for succesor blocks. if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && needVSETVLI(NewInfo, CurInfo)) { - // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it - // with current VL/VTYPE. - bool NeedInsertVSETVLI = true; - if (PrevVSETVLIMI) { - // If these two VSETVLI have the same AVL and the same VLMAX, - // we could merge these two VSETVLI. - // TODO: If we remove this, we get a `vsetvli x0, x0, vtype' - // here. We could simply let this be emitted, then remove - // the unused vsetvlis in a post-pass. - if (CurInfo.hasSameAVL(NewInfo) && CurInfo.hasSameVLMAX(NewInfo)) { - // WARNING: For correctness, it is essential the contents of VL - // and VTYPE stay the same after MI. This greatly limits the - // mutation we can legally do here. - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - } - if (NeedInsertVSETVLI) - insertVSETVLI(MBB, MI, NewInfo, CurInfo); + insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; } } - PrevVSETVLIMI = nullptr; } // If this is something that updates VL/VTYPE that we don't know about, set @@ -1155,7 +1142,6 @@ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); - PrevVSETVLIMI = nullptr; } } @@ -1378,6 +1364,52 @@ AvailableInfo, OldInfo); } +void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { + MachineInstr *PrevMI = nullptr; + bool UsedVL = false, UsedVTYPE = false; + SmallVector ToDelete; + for (MachineInstr &MI : MBB) { + // Note: Must be *before* vsetvli handling to account for config cases + // which only change some subfields. + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) + UsedVL = true; + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) + UsedVTYPE = true; + + if (!isVectorConfigInstr(MI)) + continue; + + if (PrevMI) { + if (!UsedVL && !UsedVTYPE) { + ToDelete.push_back(PrevMI); + // fallthrough + } else if (!UsedVTYPE && isVLPreservingConfig(MI)) { + // Note: `vsetvli x0, x0, vtype' is the canonical instruction + // for this case. If you find yourself wanting to add other forms + // to this "unused VTYPE" case, we're probably missing a + // canonicalization earlier. + // Note: We don't need to explicitly check vtype compatibility + // here because this form is only legal (per ISA) when not + // changing VL. + PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); + ToDelete.push_back(&MI); + // Leave PrevMI unchanged + continue; + } + } + PrevMI = &MI; + UsedVL = false; + UsedVTYPE = false; + Register VRegDef = MI.getOperand(0).getReg(); + if (VRegDef != RISCV::X0 && + !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) + UsedVL = true; + } + + for (auto *MI : ToDelete) + MI->eraseFromParent(); +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. const RISCVSubtarget &ST = MF.getSubtarget(); @@ -1443,6 +1475,15 @@ for (MachineBasicBlock &MBB : MF) emitVSETVLIs(MBB); + // Now that all vsetvlis are explicit, go through and do block local + // DSE and peephole based demanded fields based transforms. Note that + // this *must* be done outside the main dataflow so long as we allow + // any cross block analysis within the dataflow. We can't have both + // demanded fields based mutation and non-local analysis in the + // dataflow at the same time without introducing inconsistencies. + for (MachineBasicBlock &MBB : MF) + doLocalPostpass(MBB); + // Once we're fully done rewriting all the instructions, do a final pass // through to check for VSETVLIs which write to an unused destination. // For the non X0, X0 variant, we can replace the destination register diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -747,7 +747,6 @@ ; CHECK-LABEL: vector_init_vsetvli_fv2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB15_1: # %for.body @@ -812,14 +811,13 @@ ; Demonstrates a case where mutation in phase3 is problematic. We mutate the ; vsetvli without considering that it changes the compatibility result of the ; vadd in the second block. -; FIXME: This currently crashes with strict asserts enabled. define @cross_block_mutate( %a, %b, ; CHECK-LABEL: cross_block_mutate: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %mask) { entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -328,8 +328,7 @@ define double @test17(i64 %avl, %a, %b) nounwind { ; CHECK-LABEL: test17: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, mu -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfadd.vv v8, v8, v9 @@ -468,7 +467,6 @@ define void @avl_forward4( %v, * %p, i64 %reg) nounwind { ; CHECK-LABEL: avl_forward4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -498,7 +496,6 @@ define @vleNff(i64* %str, i64 %n, i64 %x) { ; CHECK-LABEL: vleNff: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl @@ -526,7 +523,6 @@ ; CHECK-LABEL: avl_forward5: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret