diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -323,6 +323,11 @@ void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl &Csts); + /// Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ... + bool + matchCombineUnmergeUndef(MachineInstr &MI, + std::function &MatchInfo); + /// Transform X, Y = G_UNMERGE Z -> X = G_TRUNC Z. bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -535,6 +535,14 @@ (apply [{ Helper.applyCombineUnmergeConstant(*${d}, ${info}); }]) >; +// Fold (unmerge undef) -> undef, undef, ... +def unmerge_undef : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $root, + [{ return Helper.matchCombineUnmergeUndef(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }]) +>; + // Transform x,y = unmerge z -> x = trunc z. def unmerge_dead_to_trunc : GICombineRule< (defs root:$d), @@ -844,7 +852,8 @@ propagate_undef_any_op, propagate_undef_all_ops, propagate_undef_shuffle_mask, - erase_undef_store]>; + erase_undef_store, + unmerge_undef]>; def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, binop_same_val, binop_left_to_zero, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1748,6 +1748,20 @@ MI.eraseFromParent(); } +bool CombinerHelper::matchCombineUnmergeUndef( + MachineInstr &MI, std::function &MatchInfo) { + unsigned SrcIdx = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + MatchInfo = [&MI](MachineIRBuilder &B) { + unsigned NumElems = MI.getNumOperands() - 1; + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + B.buildUndef(DstReg); + } + }; + return isa(MRI.getVRegDef(SrcReg)); +} + bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -25,7 +25,6 @@ ; MUBUF-NEXT: v_mov_b32_e32 v1, 11 ; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 ; MUBUF-NEXT: v_mov_b32_e32 v1, 12 -; MUBUF-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 @@ -38,7 +37,6 @@ ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_mov_b32 s32, 0 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 9 ; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 10 @@ -256,7 +254,6 @@ ; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 ; MUBUF-NEXT: v_mov_b32_e32 v1, 12 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 -; MUBUF-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 @@ -281,7 +278,6 @@ ; FLATSCR-NEXT: v_writelane_b32 v40, s33, 2 ; FLATSCR-NEXT: s_mov_b32 s33, s32 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 9 ; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir @@ -0,0 +1,22 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: split_unmerge_undef +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: split_unmerge_undef + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-NEXT: {{ $}} + %ptr1:_(p1) = COPY $vgpr0_vgpr1 + %ptr2:_(p1) = COPY $vgpr2_vgpr3 + %ptr3:_(p1) = COPY $vgpr4_vgpr5 + %vec:_(<3 x s32>) = G_IMPLICIT_DEF + %p1:_(s32), %p2:_(s32), %p3:_(s32) = G_UNMERGE_VALUES %vec + G_STORE %p1:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) + G_STORE %p2:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) + G_STORE %p3:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir @@ -11,12 +11,8 @@ ; GCN-LABEL: name: select_from_different_results_of_unmerge_values ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV]], [[UV1]] - ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %0:_(<2 x s32>) = G_IMPLICIT_DEF %1:_(s32) = COPY $vgpr0 @@ -38,10 +34,8 @@ ; GCN-LABEL: name: select_from_same_results_of_unmerge_values ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>) - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GCN-NEXT: $vgpr0 = COPY [[TRUNC]](s32) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %0:_(<2 x s32>) = G_IMPLICIT_DEF %1:_(s32) = COPY $vgpr0