diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3301,17 +3301,20 @@ True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; SDValue TrueVL = True.getOperand(TrueVLIndex); - auto IsNoFPExcept = [this](SDValue N) { - return !this->mayRaiseFPException(N.getNode()) || - N->getFlags().hasNoFPExcept(); - }; - - // Allow the peephole for non-exception True with VLMAX vector length, since - // all the values after VL of N are dependent on Merge. VLMAX should be - // lowered to (XLenVT -1). - if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL))) + // We need the VLs to be the same. But if True has a VL of VLMAX then we can + // go ahead and use N's VL because we know it will be smaller, so any tail + // elements in the result will be from Merge. + if (TrueVL != VL && !isAllOnesConstant(TrueVL)) return false; + // If we end up changing the VL or mask of True, then we need to make sure it + // doesn't raise any observable fp exceptions, since changing the active + // elements will affect how fflags is set. + if (TrueVL != VL || !IsMasked) + if (mayRaiseFPException(True.getNode()) && + !True->getFlags().hasNoFPExcept()) + return false; + SDLoc DL(N); unsigned MaskedOpc = Info->MaskedPseudo; #ifndef NDEBUG diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -378,13 +378,15 @@ declare @llvm.experimental.constrained.fadd(, , metadata, metadata) declare @llvm.riscv.vmerge.nxv2f32.nxv2f32(, , , , i64) -; FIXME: This shouldn't be folded because we need to preserve exceptions with +; This shouldn't be folded because we need to preserve exceptions with ; "fpexcept.strict" exception behaviour, and masking may hide them. define @vpmerge_constrained_fadd_vlmax( %passthru, %x, %y, %m) strictfp { ; CHECK-LABEL: vpmerge_constrained_fadd_vlmax: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, mu -; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.fadd( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %m, i64 -1)