diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -649,6 +649,17 @@ case TargetOpcode::G_UNMERGE_VALUES: Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs, WrapperObserver); break; + case TargetOpcode::G_MERGE_VALUES: + // If any of the users of this merge are an unmerge, then add them to the + // artifact worklist in case there's folding that can be done looking up. + for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { + if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || + U.getOpcode() == TargetOpcode::G_TRUNC) { + UpdatedDefs.push_back(MI.getOperand(0).getReg()); + break; + } + } + break; case TargetOpcode::G_EXTRACT: Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); break; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir @@ -81,15 +81,17 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s192) - ; CHECK: G_STORE [[UV]](s64), [[COPY1]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store 8) + ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store 8) + ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store 8) ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s192) = G_ZEXT %0(s64) %3:_(s64), %4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %2(s192) G_STORE %3, %1(p0) :: (store 8) + G_STORE %4, %1(p0) :: (store 8) + G_STORE %5, %1(p0) :: (store 8) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir @@ -91,3 +91,32 @@ $x2 = COPY %5(s64) $x3 = COPY %6(s64) ... +--- +name: unmerge_merge_combine +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: unmerge_merge_combine + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[UV]] + ; CHECK: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[C]], [[UV]] + ; CHECK: [[MUL2:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[UV1]] + ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[UV]] + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[MUL1]], [[MUL2]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD]], [[UMULH]] + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MUL]](s64), [[ADD1]](s64) + ; CHECK: $q0 = COPY [[MV]](s128) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s128) = G_ZEXTLOAD %0:_(p0) :: (load 8) + %2:_(s128) = G_LOAD %0:_(p0) :: (load 16) + %3:_(s128) = G_MUL %1:_, %2:_ + $q0 = COPY %3 + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll @@ -176,39 +176,39 @@ ; GFX9-LABEL: zextload_global_i32_to_i128: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-NEXT: v_mov_b32_e32 v2, v0 -; GFX9-NEXT: global_load_dword v0, v[2:3], off -; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 0 +; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-NEXT: v_mov_b32_e32 v3, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: zextload_global_i32_to_i128: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v3, v1 -; GFX8-NEXT: v_mov_b32_e32 v2, v0 -; GFX8-NEXT: flat_load_dword v0, v[2:3] -; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_mov_b32 s4, 0 +; GFX8-NEXT: s_mov_b32 s5, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-NEXT: v_mov_b32_e32 v3, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, s5 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-LABEL: zextload_global_i32_to_i128: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v3, v1 -; GFX6-NEXT: v_mov_b32_e32 v2, v0 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b64 s[4:5], 0 -; GFX6-NEXT: buffer_load_dword v0, v[2:3], s[4:7], 0 addr64 -; GFX6-NEXT: v_mov_b32_e32 v2, 0 +; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-NEXT: s_mov_b32 s8, 0 +; GFX6-NEXT: s_mov_b32 s4, 0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0 -; GFX6-NEXT: v_mov_b32_e32 v3, 0 +; GFX6-NEXT: v_mov_b32_e32 v2, s4 +; GFX6-NEXT: v_mov_b32_e32 v3, s8 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] %load = load i32, i32 addrspace(1)* %ptr diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir @@ -42,10 +42,8 @@ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[CTLZ1]] - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: $v0 = COPY [[SELECT]](s32) + ; MIPS32: $v1 = COPY [[C]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir @@ -85,10 +85,8 @@ ; MIPS32: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C7]](s32) ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[LSHR3]] ; MIPS32: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD4]](s32), [[C8]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: $v0 = COPY [[ADD4]](s32) + ; MIPS32: $v1 = COPY [[C8]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir @@ -57,10 +57,8 @@ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: $v0 = COPY [[SELECT]](s32) + ; MIPS32: $v1 = COPY [[C]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 @@ -142,25 +140,23 @@ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C1]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[C]] + ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]] ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]] ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[AND3]] - ; MIPS32: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) + ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) ; MIPS32: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] ; MIPS32: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]] ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]] ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV2]] - ; MIPS32: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) - ; MIPS32: $v0 = COPY [[UV2]](s32) - ; MIPS32: $v1 = COPY [[UV3]](s32) + ; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV1]] + ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) + ; MIPS32: $v0 = COPY [[UV]](s32) + ; MIPS32: $v1 = COPY [[UV1]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir @@ -109,10 +109,8 @@ ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.px) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[C]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: $v0 = COPY [[LOAD]](s32) + ; MIPS32: $v1 = COPY [[C]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 4 from %ir.px) @@ -219,10 +217,8 @@ ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[LOAD]], [[C]](s32) - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[ASHR]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: $v0 = COPY [[LOAD]](s32) + ; MIPS32: $v1 = COPY [[ASHR]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 4 from %ir.px) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zext_and_sext.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zext_and_sext.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zext_and_sext.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zext_and_sext.mir @@ -18,10 +18,8 @@ ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: $v0 = COPY [[COPY]](s32) + ; MIPS32: $v1 = COPY [[C]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(s32) = COPY $a0 %1:_(s64) = G_ZEXT %0(s32) @@ -45,10 +43,8 @@ ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: $v0 = COPY [[COPY]](s32) + ; MIPS32: $v1 = COPY [[ASHR]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(s32) = COPY $a0 %1:_(s64) = G_SEXT %0(s32)