diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -484,6 +484,7 @@ bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); + void doLocalPrepass(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -990,9 +991,8 @@ // If this is something that updates VL/VTYPE that we don't know about, set // the state to unknown. if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || - MI.modifiesRegister(RISCV::VTYPE)) { + MI.modifiesRegister(RISCV::VTYPE)) BBInfo.Change = VSETVLIInfo::getUnknown(); - } } // Initial exit state is whatever change we found in the block. @@ -1170,13 +1170,6 @@ PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); NeedInsertVSETVLI = false; } - if (isScalarMoveInstr(MI) && - ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || - (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && - NewInfo.hasSameVLMAX(CurInfo)) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } } if (NeedInsertVSETVLI) insertVSETVLI(MBB, MI, NewInfo, CurInfo); @@ -1186,7 +1179,7 @@ PrevVSETVLIMI = nullptr; } - // If this is something updates VL/VTYPE that we don't know about, set + // If this is something that updates VL/VTYPE that we don't know about, set // the state to unknown. if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { @@ -1219,6 +1212,54 @@ } } +void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { + VSETVLIInfo CurInfo = VSETVLIInfo::getUnknown(); + for (MachineInstr &MI : MBB) { + // If this is an explicit VSETVLI or VSETIVLI, update our state. + if (isVectorConfigInstr(MI)) { + CurInfo = getInfoForVSETVLI(MI); + continue; + } + + const uint64_t TSFlags = MI.getDesc().TSFlags; + if (isScalarMoveInstr(MI)) { + assert(RISCVII::hasSEWOp(TSFlags) && RISCVII::hasVLOp(TSFlags)); + const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + + // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and + // VL > 0. We can discard the user requested AVL and just use the last + // one if we can prove it equally zero. This removes a vsetvli entirely + // if the types match or allows use of cheaper avl preserving variant + // if VLMAX doesn't change. If VLMAX might change, we couldn't use + // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to + // prevent extending live range of an avl register operand. + // TODO: We can probably relax this for immediates. + if (((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || + (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && + NewInfo.hasSameVLMAX(CurInfo)) { + MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + if (CurInfo.hasAVLImm()) + VLOp.ChangeToImmediate(CurInfo.getAVLImm()); + else + VLOp.ChangeToRegister(CurInfo.getAVLReg(), /*IsDef*/ false); + CurInfo = computeInfoForInstr(MI, TSFlags, MRI); + continue; + } + } + + if (RISCVII::hasSEWOp(TSFlags)) { + CurInfo = computeInfoForInstr(MI, TSFlags, MRI); + continue; + } + + // If this is something that updates VL/VTYPE that we don't know about, + // set the state to unknown. + if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE)) + CurInfo = VSETVLIInfo::getUnknown(); + } +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. const RISCVSubtarget &ST = MF.getSubtarget(); @@ -1233,6 +1274,14 @@ assert(BlockInfo.empty() && "Expect empty block infos"); BlockInfo.resize(MF.getNumBlockIDs()); + // Scan the block locally for cases where we can mutate the operands + // of the instructions to reduce state transitions. Critically, this + // must be done before we start propagating data flow states as these + // transforms are allowed to change the contents of VTYPE and VL so + // long as the semantics of the program stays the same. + for (MachineBasicBlock &MBB : MF) + doLocalPrepass(MBB); + bool HaveVectorOp = false; // Phase 1 - determine how VL/VTYPE are affected by the each block. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -179,7 +179,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v10, a4 ; RV32-NEXT: li a5, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vmv.v.i v11, 0 @@ -871,7 +871,7 @@ ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a5 ; LMULMAX1-RV32-NEXT: li a6, 5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -85,7 +85,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v10, a4 ; RV32-NEXT: li a5, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vmv.v.i v11, 0 @@ -447,7 +447,7 @@ ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV32-NEXT: li a6, 5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.i v12, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -77,9 +77,9 @@ ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 ; RV32-V512-NEXT: li a0, 10 -; RV32-V512-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV32-V512-NEXT: vmv.s.x v0, a0 -; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -91,9 +91,9 @@ ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: li a0, 10 -; RV64-V512-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV64-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV64-V512-NEXT: vmv.s.x v0, a0 -; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; RV64-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -90,9 +90,9 @@ ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 ; RV32-V512-NEXT: li a0, 10 -; RV32-V512-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV32-V512-NEXT: vmv.s.x v0, a0 -; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -104,9 +104,9 @@ ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: li a0, 10 -; RV64-V512-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV64-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV64-V512-NEXT: vmv.s.x v0, a0 -; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; RV64-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -748,7 +748,7 @@ ; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v15, (a0) ; LMULMAX1-RV32-NEXT: li a0, 5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -433,9 +433,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -451,9 +451,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -865,9 +865,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -883,9 +883,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -328,9 +328,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmv.x.s a0, v8 @@ -346,9 +346,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmv.x.s a0, v8 @@ -710,9 +710,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmv.x.s a0, v8 @@ -728,9 +728,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1082,9 +1082,9 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-NEXT: vmv.x.s a0, v8 @@ -1098,9 +1098,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmv.x.s a0, v8 @@ -1116,9 +1116,9 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-NEXT: vmv.x.s a0, v8 @@ -1132,9 +1132,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmv.x.s a0, v8