diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -148,10 +148,7 @@ Other.MaskAgnostic); } - // Convert VLMUL to a fixed point value with 3 bits of fraction. - unsigned getSEWLMULRatio() const { - assert(isValid() && !isUnknown() && - "Can't use VTYPE for uninitialized or unknown"); + static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { unsigned LMul; bool Fractional; std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); @@ -163,6 +160,12 @@ return (SEW * 8) / LMul; } + unsigned getSEWLMULRatio() const { + assert(isValid() && !isUnknown() && + "Can't use VTYPE for uninitialized or unknown"); + return getSEWLMULRatio(SEW, VLMul); + } + // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. bool hasSameVLMAX(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && @@ -208,6 +211,29 @@ return hasSameAVL(InstrInfo); } + bool isCompatibleWithLoadStoreEEW(unsigned EEW, + const VSETVLIInfo &InstrInfo) const { + assert(isValid() && InstrInfo.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!InstrInfo.SEWLMULRatioOnly && + "Expected a valid VTYPE for instruction!"); + assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store"); + + if (isUnknown() || hasSEWLMULRatioOnly()) + return false; + + if (!hasSameAVL(InstrInfo)) + return false; + + // TODO: This check isn't required for stores. But we should ignore for all + // stores not just unit-stride and strided so leaving it for now. + if (TailAgnostic != InstrInfo.TailAgnostic || + MaskAgnostic != InstrInfo.MaskAgnostic) + return false; + + return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul); + } + bool operator==(const VSETVLIInfo &Other) const { // Uninitialized is only equal to another Uninitialized. if (!isValid()) @@ -515,6 +541,202 @@ return true; } +bool canSkipVSETVLIForLoadStore(const MachineInstr &MI, + const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) { + unsigned EEW; + switch (MI.getOpcode()) { + default: + return false; + case RISCV::PseudoVLE8_V_M1: + case RISCV::PseudoVLE8_V_M1_MASK: + case RISCV::PseudoVLE8_V_M2: + case RISCV::PseudoVLE8_V_M2_MASK: + case RISCV::PseudoVLE8_V_M4: + case RISCV::PseudoVLE8_V_M4_MASK: + case RISCV::PseudoVLE8_V_M8: + case RISCV::PseudoVLE8_V_M8_MASK: + case RISCV::PseudoVLE8_V_MF2: + case RISCV::PseudoVLE8_V_MF2_MASK: + case RISCV::PseudoVLE8_V_MF4: + case RISCV::PseudoVLE8_V_MF4_MASK: + case RISCV::PseudoVLE8_V_MF8: + case RISCV::PseudoVLE8_V_MF8_MASK: + case RISCV::PseudoVLSE8_V_M1: + case RISCV::PseudoVLSE8_V_M1_MASK: + case RISCV::PseudoVLSE8_V_M2: + case RISCV::PseudoVLSE8_V_M2_MASK: + case RISCV::PseudoVLSE8_V_M4: + case RISCV::PseudoVLSE8_V_M4_MASK: + case RISCV::PseudoVLSE8_V_M8: + case RISCV::PseudoVLSE8_V_M8_MASK: + case RISCV::PseudoVLSE8_V_MF2: + case RISCV::PseudoVLSE8_V_MF2_MASK: + case RISCV::PseudoVLSE8_V_MF4: + case RISCV::PseudoVLSE8_V_MF4_MASK: + case RISCV::PseudoVLSE8_V_MF8: + case RISCV::PseudoVLSE8_V_MF8_MASK: + case RISCV::PseudoVSE8_V_M1: + case RISCV::PseudoVSE8_V_M1_MASK: + case RISCV::PseudoVSE8_V_M2: + case RISCV::PseudoVSE8_V_M2_MASK: + case RISCV::PseudoVSE8_V_M4: + case RISCV::PseudoVSE8_V_M4_MASK: + case RISCV::PseudoVSE8_V_M8: + case RISCV::PseudoVSE8_V_M8_MASK: + case RISCV::PseudoVSE8_V_MF2: + case RISCV::PseudoVSE8_V_MF2_MASK: + case RISCV::PseudoVSE8_V_MF4: + case RISCV::PseudoVSE8_V_MF4_MASK: + case RISCV::PseudoVSE8_V_MF8: + case RISCV::PseudoVSE8_V_MF8_MASK: + case RISCV::PseudoVSSE8_V_M1: + case RISCV::PseudoVSSE8_V_M1_MASK: + case RISCV::PseudoVSSE8_V_M2: + case RISCV::PseudoVSSE8_V_M2_MASK: + case RISCV::PseudoVSSE8_V_M4: + case RISCV::PseudoVSSE8_V_M4_MASK: + case RISCV::PseudoVSSE8_V_M8: + case RISCV::PseudoVSSE8_V_M8_MASK: + case RISCV::PseudoVSSE8_V_MF2: + case RISCV::PseudoVSSE8_V_MF2_MASK: + case RISCV::PseudoVSSE8_V_MF4: + case RISCV::PseudoVSSE8_V_MF4_MASK: + case RISCV::PseudoVSSE8_V_MF8: + case RISCV::PseudoVSSE8_V_MF8_MASK: + EEW = 8; + break; + case RISCV::PseudoVLE16_V_M1: + case RISCV::PseudoVLE16_V_M1_MASK: + case RISCV::PseudoVLE16_V_M2: + case RISCV::PseudoVLE16_V_M2_MASK: + case RISCV::PseudoVLE16_V_M4: + case RISCV::PseudoVLE16_V_M4_MASK: + case RISCV::PseudoVLE16_V_M8: + case RISCV::PseudoVLE16_V_M8_MASK: + case RISCV::PseudoVLE16_V_MF2: + case RISCV::PseudoVLE16_V_MF2_MASK: + case RISCV::PseudoVLE16_V_MF4: + case RISCV::PseudoVLE16_V_MF4_MASK: + case RISCV::PseudoVLSE16_V_M1: + case RISCV::PseudoVLSE16_V_M1_MASK: + case RISCV::PseudoVLSE16_V_M2: + case RISCV::PseudoVLSE16_V_M2_MASK: + case RISCV::PseudoVLSE16_V_M4: + case RISCV::PseudoVLSE16_V_M4_MASK: + case RISCV::PseudoVLSE16_V_M8: + case RISCV::PseudoVLSE16_V_M8_MASK: + case RISCV::PseudoVLSE16_V_MF2: + case RISCV::PseudoVLSE16_V_MF2_MASK: + case RISCV::PseudoVLSE16_V_MF4: + case RISCV::PseudoVLSE16_V_MF4_MASK: + case RISCV::PseudoVSE16_V_M1: + case RISCV::PseudoVSE16_V_M1_MASK: + case RISCV::PseudoVSE16_V_M2: + case RISCV::PseudoVSE16_V_M2_MASK: + case RISCV::PseudoVSE16_V_M4: + case RISCV::PseudoVSE16_V_M4_MASK: + case RISCV::PseudoVSE16_V_M8: + case RISCV::PseudoVSE16_V_M8_MASK: + case RISCV::PseudoVSE16_V_MF2: + case RISCV::PseudoVSE16_V_MF2_MASK: + case RISCV::PseudoVSE16_V_MF4: + case RISCV::PseudoVSE16_V_MF4_MASK: + case RISCV::PseudoVSSE16_V_M1: + case RISCV::PseudoVSSE16_V_M1_MASK: + case RISCV::PseudoVSSE16_V_M2: + case RISCV::PseudoVSSE16_V_M2_MASK: + case RISCV::PseudoVSSE16_V_M4: + case RISCV::PseudoVSSE16_V_M4_MASK: + case RISCV::PseudoVSSE16_V_M8: + case RISCV::PseudoVSSE16_V_M8_MASK: + case RISCV::PseudoVSSE16_V_MF2: + case RISCV::PseudoVSSE16_V_MF2_MASK: + case RISCV::PseudoVSSE16_V_MF4: + case RISCV::PseudoVSSE16_V_MF4_MASK: + EEW = 16; + break; + case RISCV::PseudoVLE32_V_M1: + case RISCV::PseudoVLE32_V_M1_MASK: + case RISCV::PseudoVLE32_V_M2: + case RISCV::PseudoVLE32_V_M2_MASK: + case RISCV::PseudoVLE32_V_M4: + case RISCV::PseudoVLE32_V_M4_MASK: + case RISCV::PseudoVLE32_V_M8: + case RISCV::PseudoVLE32_V_M8_MASK: + case RISCV::PseudoVLE32_V_MF2: + case RISCV::PseudoVLE32_V_MF2_MASK: + case RISCV::PseudoVLSE32_V_M1: + case RISCV::PseudoVLSE32_V_M1_MASK: + case RISCV::PseudoVLSE32_V_M2: + case RISCV::PseudoVLSE32_V_M2_MASK: + case RISCV::PseudoVLSE32_V_M4: + case RISCV::PseudoVLSE32_V_M4_MASK: + case RISCV::PseudoVLSE32_V_M8: + case RISCV::PseudoVLSE32_V_M8_MASK: + case RISCV::PseudoVLSE32_V_MF2: + case RISCV::PseudoVLSE32_V_MF2_MASK: + case RISCV::PseudoVSE32_V_M1: + case RISCV::PseudoVSE32_V_M1_MASK: + case RISCV::PseudoVSE32_V_M2: + case RISCV::PseudoVSE32_V_M2_MASK: + case RISCV::PseudoVSE32_V_M4: + case RISCV::PseudoVSE32_V_M4_MASK: + case RISCV::PseudoVSE32_V_M8: + case RISCV::PseudoVSE32_V_M8_MASK: + case RISCV::PseudoVSE32_V_MF2: + case RISCV::PseudoVSE32_V_MF2_MASK: + case RISCV::PseudoVSSE32_V_M1: + case RISCV::PseudoVSSE32_V_M1_MASK: + case RISCV::PseudoVSSE32_V_M2: + case RISCV::PseudoVSSE32_V_M2_MASK: + case RISCV::PseudoVSSE32_V_M4: + case RISCV::PseudoVSSE32_V_M4_MASK: + case RISCV::PseudoVSSE32_V_M8: + case RISCV::PseudoVSSE32_V_M8_MASK: + case RISCV::PseudoVSSE32_V_MF2: + case RISCV::PseudoVSSE32_V_MF2_MASK: + EEW = 32; + break; + case RISCV::PseudoVLE64_V_M1: + case RISCV::PseudoVLE64_V_M1_MASK: + case RISCV::PseudoVLE64_V_M2: + case RISCV::PseudoVLE64_V_M2_MASK: + case RISCV::PseudoVLE64_V_M4: + case RISCV::PseudoVLE64_V_M4_MASK: + case RISCV::PseudoVLE64_V_M8: + case RISCV::PseudoVLE64_V_M8_MASK: + case RISCV::PseudoVLSE64_V_M1: + case RISCV::PseudoVLSE64_V_M1_MASK: + case RISCV::PseudoVLSE64_V_M2: + case RISCV::PseudoVLSE64_V_M2_MASK: + case RISCV::PseudoVLSE64_V_M4: + case RISCV::PseudoVLSE64_V_M4_MASK: + case RISCV::PseudoVLSE64_V_M8: + case RISCV::PseudoVLSE64_V_M8_MASK: + case RISCV::PseudoVSE64_V_M1: + case RISCV::PseudoVSE64_V_M1_MASK: + case RISCV::PseudoVSE64_V_M2: + case RISCV::PseudoVSE64_V_M2_MASK: + case RISCV::PseudoVSE64_V_M4: + case RISCV::PseudoVSE64_V_M4_MASK: + case RISCV::PseudoVSE64_V_M8: + case RISCV::PseudoVSE64_V_M8_MASK: + case RISCV::PseudoVSSE64_V_M1: + case RISCV::PseudoVSSE64_V_M1_MASK: + case RISCV::PseudoVSSE64_V_M2: + case RISCV::PseudoVSSE64_V_M2_MASK: + case RISCV::PseudoVSSE64_V_M4: + case RISCV::PseudoVSSE64_V_M4_MASK: + case RISCV::PseudoVSSE64_V_M8: + case RISCV::PseudoVSSE64_V_M8_MASK: + EEW = 64; + break; + } + + return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require); +} + bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { bool HadVectorOp = false; @@ -539,7 +761,13 @@ } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - if (needVSETVLI(NewInfo, BBInfo.Change)) + // If this is a unit-stride or strided load/store, we may be able to use + // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. + // NOTE: We only do this if the vtype we're comparing against was + // created in this block. We need the first and third phase to treat + // the store the same way. + if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && + needVSETVLI(NewInfo, BBInfo.Change)) BBInfo.Change = NewInfo; } } @@ -692,7 +920,13 @@ } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - if (needVSETVLI(NewInfo, CurInfo)) { + // If this is a unit-stride or strided load/store, we may be able to use + // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. + // NOTE: We can't use predecessor information for the store. We must + // treat it the same as the first phase so that we produce the correct + // vl/vtype for succesor blocks. + if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && + needVSETVLI(NewInfo, CurInfo)) { // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it // with current VL/VTYPE. bool NeedInsertVSETVLI = true; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -10,7 +10,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v25, (a0) ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vse32.v v26, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -27,7 +26,6 @@ ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwcvt.f.f.v v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -42,7 +40,6 @@ ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX8-NEXT: vle16.v v25, (a0) ; LMULMAX8-NEXT: vfwcvt.f.f.v v26, v25 -; LMULMAX8-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; LMULMAX8-NEXT: vse32.v v26, (a1) ; LMULMAX8-NEXT: ret ; @@ -56,7 +53,6 @@ ; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v26 ; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v25 ; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; LMULMAX1-NEXT: vse32.v v27, (a0) ; LMULMAX1-NEXT: vse32.v v26, (a1) ; LMULMAX1-NEXT: ret @@ -74,7 +70,6 @@ ; LMULMAX8-NEXT: vfwcvt.f.f.v v26, v25 ; LMULMAX8-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; LMULMAX8-NEXT: vfwcvt.f.f.v v28, v26 -; LMULMAX8-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; LMULMAX8-NEXT: vse64.v v28, (a1) ; LMULMAX8-NEXT: ret ; @@ -105,7 +100,6 @@ ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfwcvt.f.f.v v25, v29 ; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vse64.v v27, (a0) ; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -172,12 +172,11 @@ ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV32-NEXT: vid.v v25 -; RV32-NEXT: vrsub.vi v25, v25, 4 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vid.v v25 +; RV32-NEXT: vrsub.vi v25, v25, 4 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t ; RV32-NEXT: vmv2r.v v8, v26 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -437,12 +437,10 @@ ; CHECK-LABEL: copysign_neg_trunc_v4f16_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v25, (a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v26, (a1) -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v27, v26 -; CHECK-NEXT: vfsgnjn.vv v25, v25, v27 +; CHECK-NEXT: vle32.v v25, (a1) +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vfncvt.f.f.w v27, v25 +; CHECK-NEXT: vfsgnjn.vv v25, v26, v27 ; CHECK-NEXT: vse16.v v25, (a0) ; CHECK-NEXT: ret %a = load <4 x half>, <4 x half>* %x @@ -459,12 +457,12 @@ ; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle32.v v25, (a1) +; CHECK-NEXT: vle64.v v26, (a0) ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vle32.v v26, (a1) -; CHECK-NEXT: vfwcvt.f.f.v v27, v26 +; CHECK-NEXT: vfwcvt.f.f.v v27, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vv v25, v25, v27 +; CHECK-NEXT: vfsgnjn.vv v25, v26, v27 ; CHECK-NEXT: vse64.v v25, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -200,7 +200,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v25, (a0) ; CHECK-NEXT: vfwcvt.rtz.x.f.v v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v26, (a1) ; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x @@ -215,7 +214,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v25, (a0) ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v26, (a1) ; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x @@ -230,7 +228,6 @@ ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX8-NEXT: vle32.v v26, (a0) ; LMULMAX8-NEXT: vfwcvt.rtz.x.f.v v28, v26 -; LMULMAX8-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; LMULMAX8-NEXT: vse64.v v28, (a1) ; LMULMAX8-NEXT: ret ; @@ -251,7 +248,6 @@ ; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v27, v25 ; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v25, v26 ; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vse64.v v29, (a0) ; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 48 @@ -271,7 +267,6 @@ ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX8-NEXT: vle32.v v26, (a0) ; LMULMAX8-NEXT: vfwcvt.rtz.xu.f.v v28, v26 -; LMULMAX8-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; LMULMAX8-NEXT: vse64.v v28, (a1) ; LMULMAX8-NEXT: ret ; @@ -292,7 +287,6 @@ ; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v27, v25 ; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v25, v26 ; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vse64.v v29, (a0) ; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 48 @@ -314,7 +308,6 @@ ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwcvt.rtz.x.f.v v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -331,7 +324,6 @@ ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -376,8 +376,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vle8.v v25, (a0) -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vsext.vf2 v27, v25 ; CHECK-NEXT: vwmul.vv v8, v27, v26 ; CHECK-NEXT: ret @@ -393,12 +393,10 @@ ; CHECK-LABEL: vwmul_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v25, (a0) -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vle8.v v26, (a1) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vsext.vf4 v27, v26 -; CHECK-NEXT: vwmul.vv v8, v25, v27 +; CHECK-NEXT: vle8.v v25, (a1) +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsext.vf4 v27, v25 +; CHECK-NEXT: vwmul.vv v8, v26, v27 ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i8>, <4 x i8>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll @@ -376,8 +376,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vle8.v v25, (a0) -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vzext.vf2 v27, v25 ; CHECK-NEXT: vwmulu.vv v8, v27, v26 ; CHECK-NEXT: ret @@ -393,12 +393,10 @@ ; CHECK-LABEL: vwmulu_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v25, (a0) -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vle8.v v26, (a1) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vzext.vf4 v27, v26 -; CHECK-NEXT: vwmulu.vv v8, v25, v27 +; CHECK-NEXT: vle8.v v25, (a1) +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vzext.vf4 v27, v25 +; CHECK-NEXT: vwmulu.vv v8, v26, v27 ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i8>, <4 x i8>* %y