diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3317,16 +3317,39 @@ Glue = True->getOperand(True->getNumOperands() - 1); } - // We need the VLs to be the same. But if True has a VL of VLMAX then we can - // go ahead and use N's VL because we know it will be smaller, so any tail - // elements in the result will be from Merge. - if (TrueVL != VL && !isAllOnesConstant(TrueVL)) + auto GetSmallerOrEqualVL = [](SDValue LHS, SDValue RHS) { + if (LHS == RHS) + return LHS; + if (isAllOnesConstant(LHS)) + return RHS; + if (isAllOnesConstant(RHS)) + return LHS; + if (auto *CLHS = dyn_cast(LHS)) + if (auto *CRHS = dyn_cast(RHS)) + return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; + return SDValue(); + }; + + // Because N and True must have the same merge operand, the "effective" body + // is the minimum of their VLs. For example, if we have VL=3 and VL=5: + // + // |o o o|x x x x x| <- %x = PseudoVADD %merge, %a, %b, VL=3 + // |o o o x x|x x x| <- %y = PseudoVMERGE %merge, %merge, %x, %mask, VL=5 + // ----------------- + // |o o o|x x x x x| <~ %y = PseudoVADD_MASK %merge, %a, %b, %mask, VL=3 + // + // Then the result contains only the first 3 elements of True. + // + // So even if the VLs don't match, if we know that one of them is smaller than + // the other then we can go ahead and use it. + SDValue NewVL = GetSmallerOrEqualVL(TrueVL, VL); + if (!NewVL) return false; // If we end up changing the VL or mask of True, then we need to make sure it - // doesn't raise any observable fp exceptions, since changing the active - // elements will affect how fflags is set. - if (TrueVL != VL || !IsMasked) + // doesn't raise any fp exceptions, since changing the active elements will + // affect how fflags is set. + if (TrueVL != NewVL || !IsMasked) if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept()) return false; @@ -3359,7 +3382,7 @@ if (HasRoundModeOp) Ops.push_back(True.getOperand(TrueRoundModeIdx)); - Ops.append({VL, TrueSEW, PolicyOp}); + Ops.append({NewVL, TrueSEW, PolicyOp}); // True might have a chain, e.g. if it's a load if (HasChainOp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -7,31 +7,29 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: andi a0, a2, 1 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: andi a1, a2, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v12, v8, a0 -; RV32-NEXT: slli a0, a2, 30 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: slli a1, a2, 30 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v12, v12, a2 -; RV32-NEXT: vslidedown.vi v12, v12, 2 -; RV32-NEXT: vand.vi v12, v12, 1 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vand.vi v10, v10, 1 +; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret ; @@ -39,31 +37,29 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v10, (a1) -; RV64-NEXT: andi a0, a2, 1 +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: andi a1, a2, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v12, v8, a0 -; RV64-NEXT: slli a0, a2, 62 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vslide1down.vx v10, v8, a1 +; RV64-NEXT: slli a1, a2, 62 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v12, v12, a2 -; RV64-NEXT: vslidedown.vi v12, v12, 2 -; RV64-NEXT: vand.vi v12, v12, 1 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vand.vi v10, v10, 1 +; RV64-NEXT: vmsne.vi v0, v10, 0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret %va = load <6 x i32>, ptr %a @@ -222,31 +218,29 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: andi a0, a2, 1 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: andi a1, a2, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v12, v8, a0 -; RV32-NEXT: slli a0, a2, 30 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: slli a1, a2, 30 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v12, v12, a2 -; RV32-NEXT: vslidedown.vi v12, v12, 2 -; RV32-NEXT: vand.vi v12, v12, 1 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vand.vi v10, v10, 1 +; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret ; @@ -254,31 +248,29 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v10, (a1) -; RV64-NEXT: andi a0, a2, 1 +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: andi a1, a2, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v12, v8, a0 -; RV64-NEXT: slli a0, a2, 62 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vslide1down.vx v10, v8, a1 +; RV64-NEXT: slli a1, a2, 62 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v12, v12, a2 -; RV64-NEXT: vslidedown.vi v12, v12, 2 -; RV64-NEXT: vand.vi v12, v12, 1 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vand.vi v10, v10, 1 +; RV64-NEXT: vmsne.vi v0, v10, 0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret %va = load <6 x float>, ptr %a diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll @@ -155,11 +155,8 @@ define @vmerge_smaller_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_same_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 3, i64 0) %splat = insertelement poison, i1 -1, i32 0 @@ -173,10 +170,7 @@ ; CHECK-LABEL: vmerge_larger_vl_same_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 2, i64 0) %splat = insertelement poison, i1 -1, i32 0 @@ -223,10 +217,8 @@ define @vmerge_smaller_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 3, i64 0) %splat = insertelement poison, i1 -1, i32 0 @@ -239,10 +231,8 @@ define @vmerge_larger_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 2, i64 0) %splat = insertelement poison, i1 -1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -975,11 +975,8 @@ define @vmerge_smaller_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_same_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10 -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 4) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) @@ -990,11 +987,8 @@ define @vmerge_larger_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_same_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3) @@ -1035,10 +1029,8 @@ define @vmerge_smaller_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 3) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) @@ -1049,10 +1041,8 @@ define @vmerge_larger_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3)