diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll @@ -148,3 +148,105 @@ %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) ret %b } + +; Tests for folding vmerge into its ops when their VLs differ + +; Can fold with VL=2 +define @vmerge_smaller_vl_same_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_smaller_vl_same_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 3, i64 0) + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 2) + ret %b +} + +; Can fold with VL=2 +define @vmerge_larger_vl_same_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_larger_vl_same_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 2, i64 0) + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 3) + ret %b +} + +; Can fold with VL=2 +define @vmerge_smaller_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { +; CHECK-LABEL: vmerge_smaller_vl_different_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v10, v11, v0.t +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %pt1, %x, %y, %m, i64 3, i64 0) + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %mask, i64 2) + ret %b +} + +; Can't fold this because we need to take elements from both %pt1 and %pt2 +define @vmerge_larger_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { +; CHECK-LABEL: vmerge_larger_vl_different_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v10, v11, v0.t +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %pt1, %x, %y, %m, i64 2, i64 0) + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %mask, i64 3) + ret %b +} + +; Can fold with VL=2 +define @vmerge_smaller_vl_poison_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 3, i64 0) + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 2) + ret %b +} + +; Can fold with VL=2 +define @vmerge_larger_vl_poison_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_larger_vl_poison_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 2, i64 0) + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 3) + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -966,3 +966,96 @@ declare @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(, , i64) declare @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(, , , , i64) declare void @llvm.riscv.vse.nxv32i16.i64(, * nocapture, i64) + +; Tests for folding vmerge into its ops when their VLs differ + +declare @llvm.riscv.vadd.nxv2i32.nxv2i32(, , , i64) +declare @llvm.riscv.vmerge.nxv2i32.nxv2i32(, , , , i64) + +; Can fold with VL=2 +define @vmerge_smaller_vl_same_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_smaller_vl_same_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vadd.vv v11, v9, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 4) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) + ret %b +} + +; Can fold with VL=2 +define @vmerge_larger_vl_same_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_larger_vl_same_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vadd.vv v11, v9, v10 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 2) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3) + ret %b +} + +; Can fold with VL=2 +define @vmerge_smaller_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { +; CHECK-LABEL: vmerge_smaller_vl_different_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vadd.vv v8, v10, v11 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %pt1, %x, %y, i64 3) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %m, i64 2) + ret %b +} + +; Can't fold this because we need to take elements from both %pt1 and %pt2 +define @vmerge_larger_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { +; CHECK-LABEL: vmerge_larger_vl_different_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vadd.vv v8, v10, v11 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %pt1, %x, %y, i64 2) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %m, i64 3) + ret %b +} + +; Can fold with VL=2 +define @vmerge_smaller_vl_poison_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 3) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) + ret %b +} + +; Can fold with VL=2 +define @vmerge_larger_vl_poison_passthru( %passthru, %x, %y, %m) { +; CHECK-LABEL: vmerge_larger_vl_poison_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 2) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3) + ret %b +}