diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3328,10 +3328,24 @@ True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; SDValue TrueVL = True.getOperand(TrueVLIndex); - // We need the VLs to be the same. But if True has a VL of VLMAX then we can - // go ahead and use N's VL because we know it will be smaller, so any tail - // elements in the result will be from Merge. - if (TrueVL != VL && !isAllOnesConstant(TrueVL)) + auto GetMinVL = [](SDValue LHS, SDValue RHS) { + if (LHS == RHS) + return LHS; + if (isAllOnesConstant(LHS)) + return RHS; + if (isAllOnesConstant(RHS)) + return LHS; + auto *CLHS = dyn_cast(LHS); + auto *CRHS = dyn_cast(RHS); + if (!CLHS || !CRHS) + return SDValue(); + return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; + }; + + // Because N and True must have the same merge operand (or True's operand is + // implicit_def), the "effective" body is the minimum of their VLs. + VL = GetMinVL(TrueVL, VL); + if (!VL) return false; // If we end up changing the VL or mask of True, then we need to make sure it diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -7,31 +7,29 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: andi a0, a2, 1 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: andi a1, a2, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v12, v8, a0 -; RV32-NEXT: slli a0, a2, 30 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: slli a1, a2, 30 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v12, v12, a2 -; RV32-NEXT: vslidedown.vi v12, v12, 2 -; RV32-NEXT: vand.vi v12, v12, 1 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vand.vi v10, v10, 1 +; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret ; @@ -39,31 +37,29 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v10, (a1) -; RV64-NEXT: andi a0, a2, 1 +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: andi a1, a2, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v12, v8, a0 -; RV64-NEXT: slli a0, a2, 62 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vslide1down.vx v10, v8, a1 +; RV64-NEXT: slli a1, a2, 62 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v12, v12, a2 -; RV64-NEXT: vslidedown.vi v12, v12, 2 -; RV64-NEXT: vand.vi v12, v12, 1 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vand.vi v10, v10, 1 +; RV64-NEXT: vmsne.vi v0, v10, 0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret %va = load <6 x i32>, ptr %a @@ -222,31 +218,29 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: andi a0, a2, 1 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: andi a1, a2, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v12, v8, a0 -; RV32-NEXT: slli a0, a2, 30 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 -; RV32-NEXT: slli a0, a2, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: slli a1, a2, 30 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v12, v12, a2 -; RV32-NEXT: vslidedown.vi v12, v12, 2 -; RV32-NEXT: vand.vi v12, v12, 1 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vand.vi v10, v10, 1 +; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret ; @@ -254,31 +248,29 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v10, (a1) -; RV64-NEXT: andi a0, a2, 1 +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: andi a1, a2, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v12, v8, a0 -; RV64-NEXT: slli a0, a2, 62 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 -; RV64-NEXT: slli a0, a2, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vslide1down.vx v10, v8, a1 +; RV64-NEXT: slli a1, a2, 62 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v12, v12, a2 -; RV64-NEXT: vslidedown.vi v12, v12, 2 -; RV64-NEXT: vand.vi v12, v12, 1 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vand.vi v10, v10, 1 +; RV64-NEXT: vmsne.vi v0, v10, 0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret %va = load <6 x float>, ptr %a diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll @@ -155,11 +155,8 @@ define @vmerge_smaller_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_same_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 3, i64 0) %splat = insertelement poison, i1 -1, i32 0 @@ -173,10 +170,7 @@ ; CHECK-LABEL: vmerge_larger_vl_same_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 2, i64 0) %splat = insertelement poison, i1 -1, i32 0 @@ -223,10 +217,8 @@ define @vmerge_smaller_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 3, i64 0) %splat = insertelement poison, i1 -1, i32 0 @@ -239,10 +231,8 @@ define @vmerge_larger_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 2, i64 0) %splat = insertelement poison, i1 -1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -991,11 +991,8 @@ define @vmerge_smaller_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_same_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10 -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 4) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) @@ -1006,11 +1003,8 @@ define @vmerge_larger_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_same_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv1r.v v11, v8 -; CHECK-NEXT: vadd.vv v11, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3) @@ -1051,10 +1045,8 @@ define @vmerge_smaller_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 3) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) @@ -1065,10 +1057,8 @@ define @vmerge_larger_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_poison_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3)