Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -75,7 +75,17 @@ return RVV->BaseInstr; } -static bool isScalarMoveInstr(const MachineInstr &MI) { +static bool isScalarExtractInstr(const MachineInstr &MI) { + switch (getRVVMCOpcode(MI.getOpcode())) { + default: + return false; + case RISCV::VMV_X_S: + case RISCV::VFMV_F_S: + return true; + } +} + +static bool isScalarInsertInstr(const MachineInstr &MI) { switch (getRVVMCOpcode(MI.getOpcode())) { default: return false; @@ -358,7 +368,7 @@ } // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. - if (isScalarMoveInstr(MI)) { + if (isScalarInsertInstr(MI)) { Res.LMUL = false; Res.SEWLMULRatio = false; Res.VLAny = false; @@ -374,6 +384,14 @@ } } + // vmv.x.s, and vmv.f.s are unconditional and ignore VL and LMUL + if (isScalarExtractInstr(MI)) { + Res.LMUL = false; + Res.SEWLMULRatio = false; + Res.VLAny = false; + Res.VLZeroness = false; + } + return Res; } @@ -999,7 +1017,7 @@ // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to // prevent extending live range of an avl register operand. // TODO: We can probably relax this for immediates. - if (isScalarMoveInstr(MI) && PrevInfo.isValid() && + if (isScalarInsertInstr(MI) && PrevInfo.isValid() && PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) { if (PrevInfo.hasAVLImm()) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -582,10 +582,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_8 ; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -1537,11 +1536,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_8 ; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2843,11 +2841,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -6790,11 +6787,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_8 ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -8042,11 +8038,10 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -10769,10 +10764,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8 ; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -10797,10 +10791,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13 ; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store15 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 512 @@ -10817,10 +10810,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_19 ; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 50 @@ -11125,10 +11117,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8 ; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -11153,10 +11144,9 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13 ; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store15 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 8 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 512 @@ -11181,10 +11171,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 47 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_26 ; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store31 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 46 @@ -11201,10 +11190,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 43 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_32 ; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store39 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 42 @@ -11229,10 +11217,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 39 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_37 ; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store47 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 38 @@ -11249,10 +11236,9 @@ ; RV64ZVE32F-NEXT: slli a2, a1, 35 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_43 ; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 34