diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -59,12 +59,13 @@ uint8_t MaskAgnostic : 1; uint8_t MaskRegOp : 1; uint8_t StoreOp : 1; + uint8_t ScalarMovOp : 1; uint8_t SEWLMULRatioOnly : 1; public: VSETVLIInfo() : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false), - StoreOp(false), SEWLMULRatioOnly(false) {} + StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {} static VSETVLIInfo getUnknown() { VSETVLIInfo Info; @@ -96,6 +97,18 @@ assert(hasAVLImm()); return AVLImm; } + bool hasZeroAVL() const { + if (hasAVLImm()) + return getAVLImm() == 0; + return false; + } + bool hasNonZeroAVL() const { + if (hasAVLImm()) + return getAVLImm() > 0; + if (hasAVLReg()) + return getAVLReg() == RISCV::X0; + return false; + } bool hasSameAVL(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && @@ -120,7 +133,7 @@ MaskAgnostic = RISCVVType::isMaskAgnostic(VType); } void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO, - bool IsStore) { + bool IsStore, bool IsScalarMovOp) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = L; @@ -129,6 +142,7 @@ MaskAgnostic = MA; MaskRegOp = MRO; StoreOp = IsStore; + ScalarMovOp = IsScalarMovOp; } unsigned encodeVTYPE() const { @@ -139,6 +153,16 @@ bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } + bool hasSameSEW(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare VTYPE in unknown state"); + assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && + "Can't compare when only LMUL/SEW ratio is valid."); + return SEW == Other.SEW; + } + bool hasSameVTYPE(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); @@ -178,6 +202,15 @@ return getSEWLMULRatio() == Other.getSEWLMULRatio(); } + bool hasSamePolicy(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare VTYPE in unknown state"); + return TailAgnostic == Other.TailAgnostic && + MaskAgnostic == Other.MaskAgnostic; + } + bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const { // Simple case, see if full VTYPE matches. if (hasSameVTYPE(InstrInfo)) @@ -222,6 +255,15 @@ return true; } + // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. + // So it's compatible when we could make sure that both VL be the same + // situation. + if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() && + ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) || + (hasZeroAVL() && InstrInfo.hasZeroAVL())) && + hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo)) + return true; + // The AVL must match. if (!hasSameAVL(InstrInfo)) return false; @@ -414,6 +456,42 @@ } } +static bool isScalarMoveInstr(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case RISCV::PseudoVMV_S_X_M1: + case RISCV::PseudoVMV_S_X_M2: + case RISCV::PseudoVMV_S_X_M4: + case RISCV::PseudoVMV_S_X_M8: + case RISCV::PseudoVMV_S_X_MF2: + case RISCV::PseudoVMV_S_X_MF4: + case RISCV::PseudoVMV_S_X_MF8: + case RISCV::PseudoVFMV_F16_S_M1: + case RISCV::PseudoVFMV_F16_S_M2: + case RISCV::PseudoVFMV_F16_S_M4: + case RISCV::PseudoVFMV_F16_S_M8: + case RISCV::PseudoVFMV_F16_S_MF2: + case RISCV::PseudoVFMV_F16_S_MF4: + case RISCV::PseudoVFMV_F16_S_MF8: + case RISCV::PseudoVFMV_F32_S_M1: + case RISCV::PseudoVFMV_F32_S_M2: + case RISCV::PseudoVFMV_F32_S_M4: + case RISCV::PseudoVFMV_F32_S_M8: + case RISCV::PseudoVFMV_F32_S_MF2: + case RISCV::PseudoVFMV_F32_S_MF4: + case RISCV::PseudoVFMV_F32_S_MF8: + case RISCV::PseudoVFMV_F64_S_M1: + case RISCV::PseudoVFMV_F64_S_M2: + case RISCV::PseudoVFMV_F64_S_M4: + case RISCV::PseudoVFMV_F64_S_M8: + case RISCV::PseudoVFMV_F64_S_MF2: + case RISCV::PseudoVFMV_F64_S_MF4: + case RISCV::PseudoVFMV_F64_S_MF8: + return true; + } +} + static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const MachineRegisterInfo *MRI) { VSETVLIInfo InstrInfo; @@ -461,6 +539,7 @@ // If there are no explicit defs, this is a store instruction which can // ignore the tail and mask policies. bool StoreOp = MI.getNumExplicitDefs() == 0; + bool ScalarMovOp = isScalarMoveInstr(MI); if (RISCVII::hasVLOp(TSFlags)) { const MachineOperand &VLOp = MI.getOperand(NumOperands - 2); @@ -477,7 +556,7 @@ } else InstrInfo.setAVLReg(RISCV::NoRegister); InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic, - /*MaskAgnostic*/ false, MaskRegOp, StoreOp); + /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp); return InstrInfo; } @@ -1000,6 +1079,13 @@ PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); NeedInsertVSETVLI = false; } + if (isScalarMoveInstr(MI) && + ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || + (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && + NewInfo.hasSameVLMAX(CurInfo)) { + PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); + NeedInsertVSETVLI = false; + } } if (NeedInsertVSETVLI) insertVSETVLI(MBB, MI, NewInfo, CurInfo); diff --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll --- a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll +++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll @@ -27,12 +27,10 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu ; CHECK-NEXT: vslideup.vi v12, v8, 8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vrgather.vv v8, v20, v16 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addiw a0, a0, -1366 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vrgather.vv v8, v20, v16 ; CHECK-NEXT: vrgather.vv v8, v12, v18, v0.t ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -513,13 +513,12 @@ ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vmv.v.i v8, 2 -; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vmv.v.i v9, 2 ; RV32-NEXT: li a1, 36 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 +; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmerge.vim v8, v9, 3, v0 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -319,9 +319,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vmv.v.x v8, a0 @@ -401,11 +399,9 @@ ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, mu ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -439,11 +435,9 @@ ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vrgather.vv v10, v8, v11 ; RV32-NEXT: li a0, 66 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v10, v8, v11 ; RV32-NEXT: vrgather.vi v10, v9, 0, v0.t ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret @@ -455,11 +449,9 @@ ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vmv.v.x v11, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: vrgather.vv v10, v8, v11 ; RV64-NEXT: li a0, 66 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v10, v8, v11 ; RV64-NEXT: vrgather.vi v10, v9, 0, v0.t ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret @@ -502,11 +494,9 @@ ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: li a0, 98 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret @@ -524,11 +514,9 @@ ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: li a0, 98 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -4114,22 +4114,16 @@ ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 289 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 3 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v8, 2, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 ; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI130_0) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI130_0) @@ -4153,22 +4147,16 @@ ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) ; LMULMAX2-RV64-NEXT: lui a1, 2 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX2-RV64-NEXT: vmv.v.i v8, 3 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v8, 2, v0 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v8, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 ; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI130_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI130_0) @@ -4531,11 +4519,9 @@ ; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 7 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1687 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 5 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1755 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: lui a1, 1048571 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1755 @@ -4553,11 +4539,9 @@ ; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV64-NEXT: lui a1, 7 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV64-NEXT: lui a1, 5 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64-NEXT: lui a1, 1048571 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -256,9 +256,8 @@ ; RV64-NEXT: lwu a0, 0(a0) ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; RV64-NEXT: vslideup.vi v9, v8, 1 ; RV64-NEXT: .LBB5_4: # %else2 ; RV64-NEXT: vmv1r.v v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -147,8 +147,7 @@ define @test7( %a, i64 %b, %mask) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, mu +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: @@ -163,8 +162,7 @@ define @test8( %a, i64 %b, %mask) nounwind { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli a1, 6, e64, m1, ta, mu -; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vsetivli a1, 6, e64, m1, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: @@ -178,7 +176,6 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: