diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3231,6 +3231,7 @@ case ISD::VSELECT: case ISD::SELECT: case ISD::VP_SELECT: + case ISD::VP_MERGE: Res = WidenVecRes_Select(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; @@ -4782,7 +4783,7 @@ SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return Opcode == ISD::VP_SELECT + return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, N->getOperand(3)) : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -78,6 +78,43 @@ ret <4 x i8> %v } +declare <6 x i8> @llvm.vp.merge.v6i8(<6 x i1>, <6 x i8>, <6 x i8>, i32) + +define <6 x i8> @vpmerge_vv_v6i8(<6 x i8> %va, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v6i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl) + ret <6 x i8> %v +} + +define <6 x i8> @vpmerge_vx_v6i8(i8 %a, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v6i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <6 x i8> poison, i8 %a, i32 0 + %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer + %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl) + ret <6 x i8> %v +} + +define <6 x i8> @vpmerge_vi_v6i8(<6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v6i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <6 x i8> poison, i8 2, i32 0 + %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer + %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl) + ret <6 x i8> %v +} + declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { @@ -981,10 +1018,10 @@ ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 0 -; RV32-NEXT: bltu a2, a3, .LBB72_2 +; RV32-NEXT: bltu a2, a3, .LBB75_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB72_2: +; RV32-NEXT: .LBB75_2: ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32-NEXT: vslidedown.vi v0, v1, 2 @@ -1001,10 +1038,10 @@ ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV32-NEXT: bltu a2, a0, .LBB72_4 +; RV32-NEXT: bltu a2, a0, .LBB75_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB72_4: +; RV32-NEXT: .LBB75_4: ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: addi a0, sp, 16 @@ -1037,10 +1074,10 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 0 -; RV64-NEXT: bltu a2, a3, .LBB72_2 +; RV64-NEXT: bltu a2, a3, .LBB75_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a3 -; RV64-NEXT: .LBB72_2: +; RV64-NEXT: .LBB75_2: ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v1, 2 @@ -1049,10 +1086,10 @@ ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 -; RV64-NEXT: bltu a2, a0, .LBB72_4 +; RV64-NEXT: bltu a2, a0, .LBB75_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a2, 16 -; RV64-NEXT: .LBB72_4: +; RV64-NEXT: .LBB75_4: ; RV64-NEXT: vsetvli zero, a2, e64, m8, tu, mu ; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a0, vlenb @@ -1077,19 +1114,19 @@ ; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a0, a2, .LBB73_2 +; CHECK-NEXT: bltu a0, a2, .LBB76_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB73_2: +; CHECK-NEXT: .LBB76_2: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v24, 2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 -; CHECK-NEXT: bltu a0, a1, .LBB73_4 +; CHECK-NEXT: bltu a0, a1, .LBB76_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB73_4: +; CHECK-NEXT: .LBB76_4: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -78,6 +78,43 @@ ret %v } +declare @llvm.vp.merge.nxv3i8(, , , i32) + +define @vpmerge_vv_nxv3i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv3i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv3i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv3i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv3i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv3i8( %m, %va, %vb, i32 %evl) + ret %v +} + declare @llvm.vp.merge.nxv4i8(, , , i32) define @vpmerge_vv_nxv4i8( %va, %vb, %m, i32 zeroext %evl) { @@ -295,10 +332,10 @@ ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a3, a4, .LBB21_2 +; RV32-NEXT: bltu a3, a4, .LBB24_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a4 -; RV32-NEXT: .LBB21_2: +; RV32-NEXT: .LBB24_2: ; RV32-NEXT: vl8r.v v8, (a0) ; RV32-NEXT: vsetvli zero, a2, e8, m8, tu, mu ; RV32-NEXT: vmv1r.v v0, v2 @@ -313,10 +350,10 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV32-NEXT: bltu a3, a1, .LBB21_4 +; RV32-NEXT: bltu a3, a1, .LBB24_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB21_4: +; RV32-NEXT: .LBB24_4: ; RV32-NEXT: vsetvli zero, a3, e8, m8, tu, mu ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: addi a0, sp, 16 @@ -347,18 +384,18 @@ ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a3, a4, .LBB21_2 +; RV64-NEXT: bltu a3, a4, .LBB24_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a4 -; RV64-NEXT: .LBB21_2: +; RV64-NEXT: .LBB24_2: ; RV64-NEXT: vl8r.v v8, (a0) ; RV64-NEXT: vsetvli zero, a2, e8, m8, tu, mu ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 -; RV64-NEXT: bltu a3, a1, .LBB21_4 +; RV64-NEXT: bltu a3, a1, .LBB24_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB21_4: +; RV64-NEXT: .LBB24_4: ; RV64-NEXT: vsetvli zero, a3, e8, m8, tu, mu ; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: addi a0, sp, 16 @@ -380,20 +417,20 @@ ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a3, a3, 3 ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB22_2 +; CHECK-NEXT: bltu a2, a3, .LBB25_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, mu ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, mu ; CHECK-NEXT: sub a1, a2, a3 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: bltu a2, a1, .LBB22_4 +; CHECK-NEXT: bltu a2, a1, .LBB25_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: .LBB25_4: ; CHECK-NEXT: vsetvli zero, a5, e8, m8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmerge.vxm v16, v16, a0, v0 @@ -410,20 +447,20 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: bltu a1, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, mu ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, mu ; CHECK-NEXT: sub a0, a1, a2 ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 -; CHECK-NEXT: bltu a1, a0, .LBB23_4 +; CHECK-NEXT: bltu a1, a0, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a4, a0 -; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: .LBB26_4: ; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmerge.vim v16, v16, 2, v0