diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -148,10 +148,7 @@ Other.MaskAgnostic); } - // Convert VLMUL to a fixed point value with 3 bits of fraction. - unsigned getSEWLMULRatio() const { - assert(isValid() && !isUnknown() && - "Can't use VTYPE for uninitialized or unknown"); + static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { unsigned LMul; bool Fractional; std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); @@ -163,6 +160,12 @@ return (SEW * 8) / LMul; } + unsigned getSEWLMULRatio() const { + assert(isValid() && !isUnknown() && + "Can't use VTYPE for uninitialized or unknown"); + return getSEWLMULRatio(SEW, VLMul); + } + // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. bool hasSameVLMAX(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && @@ -208,6 +211,25 @@ return hasSameAVL(InstrInfo); } + bool isCompatibleWithStoreEEW(unsigned EEW, const VSETVLIInfo &InstrInfo) const { + assert(isValid() && InstrInfo.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!InstrInfo.SEWLMULRatioOnly && + "Expected a valid VTYPE for instruction!"); + assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store"); + + if (isUnknown() || hasSEWLMULRatioOnly()) + return false; + + // TODO: This check isn't required for stores. But we should ignore for all + // stores not just unit-stride so leaving it for now. + if (TailAgnostic != InstrInfo.TailAgnostic || + MaskAgnostic != InstrInfo.MaskAgnostic) + return false; + + return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul); + } + bool operator==(const VSETVLIInfo &Other) const { // Uninitialized is only equal to another Uninitialized. if (!isValid()) @@ -492,6 +514,70 @@ return true; } +bool canSkipVSETVLIForStore(const MachineInstr &MI, + const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) { + unsigned EEW; + switch (MI.getOpcode()) { + default: + return false; + case RISCV::PseudoVSE8_V_M1: + case RISCV::PseudoVSE8_V_M1_MASK: + case RISCV::PseudoVSE8_V_M2: + case RISCV::PseudoVSE8_V_M2_MASK: + case RISCV::PseudoVSE8_V_M4: + case RISCV::PseudoVSE8_V_M4_MASK: + case RISCV::PseudoVSE8_V_M8: + case RISCV::PseudoVSE8_V_M8_MASK: + case RISCV::PseudoVSE8_V_MF2: + case RISCV::PseudoVSE8_V_MF2_MASK: + case RISCV::PseudoVSE8_V_MF4: + case RISCV::PseudoVSE8_V_MF4_MASK: + case RISCV::PseudoVSE8_V_MF8: + case RISCV::PseudoVSE8_V_MF8_MASK: + EEW = 8; + break; + case RISCV::PseudoVSE16_V_M1: + case RISCV::PseudoVSE16_V_M1_MASK: + case RISCV::PseudoVSE16_V_M2: + case RISCV::PseudoVSE16_V_M2_MASK: + case RISCV::PseudoVSE16_V_M4: + case RISCV::PseudoVSE16_V_M4_MASK: + case RISCV::PseudoVSE16_V_M8: + case RISCV::PseudoVSE16_V_M8_MASK: + case RISCV::PseudoVSE16_V_MF2: + case RISCV::PseudoVSE16_V_MF2_MASK: + case RISCV::PseudoVSE16_V_MF4: + case RISCV::PseudoVSE16_V_MF4_MASK: + EEW = 16; + break; + case RISCV::PseudoVSE32_V_M1: + case RISCV::PseudoVSE32_V_M1_MASK: + case RISCV::PseudoVSE32_V_M2: + case RISCV::PseudoVSE32_V_M2_MASK: + case RISCV::PseudoVSE32_V_M4: + case RISCV::PseudoVSE32_V_M4_MASK: + case RISCV::PseudoVSE32_V_M8: + case RISCV::PseudoVSE32_V_M8_MASK: + case RISCV::PseudoVSE32_V_MF2: + case RISCV::PseudoVSE32_V_MF2_MASK: + EEW = 32; + break; + case RISCV::PseudoVSE64_V_M1: + case RISCV::PseudoVSE64_V_M1_MASK: + case RISCV::PseudoVSE64_V_M2: + case RISCV::PseudoVSE64_V_M2_MASK: + case RISCV::PseudoVSE64_V_M4: + case RISCV::PseudoVSE64_V_M4_MASK: + case RISCV::PseudoVSE64_V_M8: + case RISCV::PseudoVSE64_V_M8_MASK: + EEW = 64; + break; + } + + return CurInfo.isCompatibleWithStoreEEW(EEW, Require); +} + bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { bool HadVectorOp = false; @@ -516,7 +602,13 @@ } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - if (needVSETVLI(NewInfo, BBInfo.Change)) + // If this is a unit-stride store, we may be able to use the + // EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. + // NOTE: We only do this if the vtype we're comparing against was + // created in this block. We need the first and third phase to treat + // the store the same way. + if (!canSkipVSETVLIForStore(MI, NewInfo, BBInfo.Change) && + needVSETVLI(NewInfo, BBInfo.Change)) BBInfo.Change = NewInfo; } } @@ -666,7 +758,13 @@ } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - if (needVSETVLI(NewInfo, CurInfo)) { + // If this is a unit-stride store, we may be able to use the + // EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. + // NOTE: We can't use predecessor information for the store. We must + // treat it the same as the first phase so that we produce the correct + // vl/vtype for succesor blocks. + if (!canSkipVSETVLIForStore(MI, NewInfo, CurInfo) && + needVSETVLI(NewInfo, CurInfo)) { insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -10,7 +10,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v25, (a0) ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vse32.v v26, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -27,7 +26,6 @@ ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwcvt.f.f.v v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -42,7 +40,6 @@ ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX8-NEXT: vle16.v v25, (a0) ; LMULMAX8-NEXT: vfwcvt.f.f.v v26, v25 -; LMULMAX8-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; LMULMAX8-NEXT: vse32.v v26, (a1) ; LMULMAX8-NEXT: ret ; @@ -56,7 +53,6 @@ ; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v26 ; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v25 ; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; LMULMAX1-NEXT: vse32.v v27, (a0) ; LMULMAX1-NEXT: vse32.v v26, (a1) ; LMULMAX1-NEXT: ret @@ -74,7 +70,6 @@ ; LMULMAX8-NEXT: vfwcvt.f.f.v v26, v25 ; LMULMAX8-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; LMULMAX8-NEXT: vfwcvt.f.f.v v28, v26 -; LMULMAX8-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; LMULMAX8-NEXT: vse64.v v28, (a1) ; LMULMAX8-NEXT: ret ; @@ -105,7 +100,6 @@ ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfwcvt.f.f.v v25, v29 ; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vse64.v v27, (a0) ; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -200,7 +200,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v25, (a0) ; CHECK-NEXT: vfwcvt.rtz.x.f.v v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v26, (a1) ; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x @@ -215,7 +214,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v25, (a0) ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v26, (a1) ; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x @@ -230,7 +228,6 @@ ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX8-NEXT: vle32.v v26, (a0) ; LMULMAX8-NEXT: vfwcvt.rtz.x.f.v v28, v26 -; LMULMAX8-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; LMULMAX8-NEXT: vse64.v v28, (a1) ; LMULMAX8-NEXT: ret ; @@ -251,7 +248,6 @@ ; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v27, v25 ; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v25, v26 ; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vse64.v v29, (a0) ; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 48 @@ -271,7 +267,6 @@ ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX8-NEXT: vle32.v v26, (a0) ; LMULMAX8-NEXT: vfwcvt.rtz.xu.f.v v28, v26 -; LMULMAX8-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; LMULMAX8-NEXT: vse64.v v28, (a1) ; LMULMAX8-NEXT: ret ; @@ -292,7 +287,6 @@ ; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v27, v25 ; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v25, v26 ; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vse64.v v29, (a0) ; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 48 @@ -314,7 +308,6 @@ ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwcvt.rtz.x.f.v v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -331,7 +324,6 @@ ; CHECK-NEXT: vfwcvt.f.f.v v26, v25 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x