diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -75,7 +75,17 @@ return RVV->BaseInstr; } -static bool isScalarMoveInstr(const MachineInstr &MI) { +static bool isScalarExtractInstr(const MachineInstr &MI) { + switch (getRVVMCOpcode(MI.getOpcode())) { + default: + return false; + case RISCV::VMV_X_S: + case RISCV::VFMV_F_S: + return true; + } +} + +static bool isScalarInsertInstr(const MachineInstr &MI) { switch (getRVVMCOpcode(MI.getOpcode())) { default: return false; @@ -358,7 +368,7 @@ } // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. - if (isScalarMoveInstr(MI)) { + if (isScalarInsertInstr(MI)) { Res.LMUL = false; Res.SEWLMULRatio = false; Res.VLAny = false; @@ -374,6 +384,15 @@ } } + // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. + if (isScalarExtractInstr(MI)) { + assert(!RISCVII::hasVLOp(TSFlags)); + Res.LMUL = false; + Res.SEWLMULRatio = false; + Res.TailPolicy = false; + Res.MaskPolicy = false; + } + return Res; } @@ -549,12 +568,6 @@ if (SEWLMULRatioOnly) return false; - // If the instruction doesn't need an AVLReg and the SEW matches, consider - // it compatible. - if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) - if (SEW == Require.SEW) - return true; - if (Used.VLAny && !hasSameAVL(Require)) return false; @@ -783,6 +796,7 @@ InstrInfo.setAVLReg(VLOp.getReg()); } } else { + assert(isScalarExtractInstr(MI)); InstrInfo.setAVLReg(RISCV::NoRegister); } #ifndef NDEBUG @@ -999,7 +1013,7 @@ // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to // prevent extending live range of an avl register operand. // TODO: We can probably relax this for immediates. - if (isScalarMoveInstr(MI) && PrevInfo.isValid() && + if (isScalarInsertInstr(MI) && PrevInfo.isValid() && PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) { if (PrevInfo.hasAVLImm()) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -797,11 +797,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_8 ; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -1964,12 +1963,11 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_8 ; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -3501,12 +3499,11 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_7 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -7901,12 +7898,11 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_8 ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -9312,12 +9308,11 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_7 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -12272,11 +12267,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_8 ; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -12303,11 +12297,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_13 ; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load22 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8 ; RV64ZVE32F-NEXT: andi a2, a1, 512 @@ -12325,11 +12318,10 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: bgez a2, .LBB97_19 ; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12 ; RV64ZVE32F-NEXT: slli a2, a1, 50 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -582,10 +582,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_8 ; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -1537,11 +1536,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_8 ; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2843,11 +2841,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -6790,11 +6787,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_8 ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -8042,11 +8038,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -10769,10 +10764,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8 ; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -10797,10 +10791,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13 ; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store15 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 512 @@ -10817,10 +10810,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_19 ; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 50 @@ -11125,10 +11117,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8 ; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -11153,10 +11144,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13 ; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store15 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 8 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 512 @@ -11181,10 +11171,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 47 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_26 ; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store31 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 46 @@ -11201,10 +11190,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 43 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_32 ; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store39 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 42 @@ -11229,10 +11217,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 39 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_37 ; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store47 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 38 @@ -11249,10 +11236,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 35 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_43 ; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 34