diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -540,7 +540,7 @@ std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } - if (Opcode != ISD::VP_SELECT) { + if (Opcode != ISD::VP_SELECT && Opcode != ISD::VP_MERGE) { Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL); Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH); return; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -915,6 +915,7 @@ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::VSELECT: case ISD::SELECT: + case ISD::VP_MERGE: case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) @@ -951,3 +951,151 @@ %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) ret <16 x double> %v } + +declare <32 x double> @llvm.vp.merge.v32f64(<32 x i1>, <32 x double>, <32 x double>, i32) + +define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vv_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a2, -16 +; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 0 +; RV32-NEXT: bltu a2, a3, .LBB72_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB72_2: +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV32-NEXT: vslidedown.vi v0, v1, 2 +; RV32-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV32-NEXT: li a0, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV32-NEXT: bltu a2, a0, .LBB72_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB72_4: +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vv_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: addi a3, a2, -16 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v1, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 0 +; RV64-NEXT: bltu a2, a3, .LBB72_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a3 +; RV64-NEXT: .LBB72_2: +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV64-NEXT: vslidedown.vi v0, v1, 2 +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: li a0, 16 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 +; RV64-NEXT: bltu a2, a0, .LBB72_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB72_4: +; RV64-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: vmv8r.v v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl) + ret <32 x double> %v +} + +define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a0, a2, .LBB73_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB73_2: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v0, v24, 2 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 +; CHECK-NEXT: bltu a0, a1, .LBB73_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB73_4: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x double> poison, double %a, i32 0 + %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer + %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl) + ret <32 x double> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.merge.nxv1i8(, , , i32) @@ -263,6 +263,177 @@ ret %v } +declare @llvm.vp.merge.nxv128i8(, , , i32) + +define @vpmerge_vv_nxv128i8( %va, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vv_nxv128i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a4, a0, a1 +; RV32-NEXT: vl8r.v v24, (a4) +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli a4, zero, e8, m8, ta, mu +; RV32-NEXT: vlm.v v2, (a2) +; RV32-NEXT: sub a4, a3, a1 +; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 0 +; RV32-NEXT: bltu a3, a4, .LBB21_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB21_2: +; RV32-NEXT: vl8r.v v8, (a0) +; RV32-NEXT: vsetvli zero, a2, e8, m8, tu, mu +; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV32-NEXT: bltu a3, a1, .LBB21_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, a1 +; RV32-NEXT: .LBB21_4: +; RV32-NEXT: vsetvli zero, a3, e8, m8, tu, mu +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vv_nxv128i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a4, a0, a1 +; RV64-NEXT: vl8r.v v24, (a4) +; RV64-NEXT: vsetvli a4, zero, e8, m8, ta, mu +; RV64-NEXT: vlm.v v2, (a2) +; RV64-NEXT: sub a4, a3, a1 +; RV64-NEXT: vmv1r.v v1, v0 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: li a2, 0 +; RV64-NEXT: bltu a3, a4, .LBB21_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a4 +; RV64-NEXT: .LBB21_2: +; RV64-NEXT: vl8r.v v8, (a0) +; RV64-NEXT: vsetvli zero, a2, e8, m8, tu, mu +; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 +; RV64-NEXT: bltu a3, a1, .LBB21_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, a1 +; RV64-NEXT: .LBB21_4: +; RV64-NEXT: vsetvli zero, a3, e8, m8, tu, mu +; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: vmv8r.v v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %v = call @llvm.vp.merge.nxv128i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv128i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, mu +; CHECK-NEXT: sub a1, a2, a3 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: bltu a2, a1, .LBB22_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: vsetvli zero, a5, e8, m8, tu, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmerge.vxm v16, v16, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv128i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv128i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, mu +; CHECK-NEXT: sub a0, a1, a2 +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: bltu a1, a0, .LBB23_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmerge.vim v16, v16, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv128i8( %m, %va, %vb, i32 %evl) + ret %v +} + declare @llvm.vp.merge.nxv1i16(, , , i32) define @vpmerge_vv_nxv1i16( %va, %vb, %m, i32 zeroext %evl) {