diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -261,9 +261,10 @@ } // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. - // As such, the result does not depend on LMUL. - if (isScalarMoveInstr(MI)) + if (isScalarMoveInstr(MI)) { Res.LMUL = false; + Res.SEWLMULRatio = false; + } return Res; } @@ -378,16 +379,6 @@ bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } - bool hasSameSEW(const VSETVLIInfo &Other) const { - assert(isValid() && Other.isValid() && - "Can't compare invalid VSETVLIInfos"); - assert(!isUnknown() && !Other.isUnknown() && - "Can't compare VTYPE in unknown state"); - assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && - "Can't compare when only LMUL/SEW ratio is valid."); - return SEW == Other.SEW; - } - bool hasSameVTYPE(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); @@ -418,15 +409,6 @@ return getSEWLMULRatio() == Other.getSEWLMULRatio(); } - bool hasSamePolicy(const VSETVLIInfo &Other) const { - assert(isValid() && Other.isValid() && - "Can't compare invalid VSETVLIInfos"); - assert(!isUnknown() && !Other.isUnknown() && - "Can't compare VTYPE in unknown state"); - return TailAgnostic == Other.TailAgnostic && - MaskAgnostic == Other.MaskAgnostic; - } - bool hasCompatibleVTYPE(const DemandedFields &Used, const VSETVLIInfo &Require) const { return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); @@ -454,8 +436,7 @@ if (SEW == Require.SEW) return true; - // TODO: Check Used.VL here - if (!hasSameAVL(Require)) + if (Used.VL && !hasSameAVL(Require)) return false; return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); } @@ -804,23 +785,28 @@ if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; - const DemandedFields Used = getDemanded(MI); - if (CurInfo.isCompatible(Used, Require)) - return false; + DemandedFields Used = getDemanded(MI); // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. - // Additionally, if writing to an implicit_def operand, we don't need to - // preserve any other bits and are thus compatible with any larger etype, - // and can disregard policy bits. if (isScalarMoveInstr(MI) && CurInfo.hasEquallyZeroAVL(Require)) { + Used.VL = false; + // Additionally, if writing to an implicit_def operand, we don't need to + // preserve any other bits and are thus compatible with any larger etype, + // and can disregard policy bits. Warning: It's tempting to try doing + // this for any tail agnostic operation, but we can't as TA requires + // tail lanes to either be the original value or -1. We are writing + // unknown bits to the lanes here. auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); if (VRegDef && VRegDef->isImplicitDef() && - CurInfo.getSEW() >= Require.getSEW()) - return false; - if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require)) - return false; + CurInfo.getSEW() >= Require.getSEW()) { + Used.SEW = false; + Used.TailPolicy = false; + } } + if (CurInfo.isCompatible(Used, Require)) + return false; + // We didn't find a compatible value. If our AVL is a virtual register, // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need // and the last VL/VTYPE we observed is the same, we don't need a diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -181,7 +181,6 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: @@ -227,7 +226,6 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: