diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -96,7 +96,8 @@ bool tryCombineZExt(MachineInstr &MI, SmallVectorImpl &DeadInsts, - SmallVectorImpl &UpdatedDefs) { + SmallVectorImpl &UpdatedDefs, + GISelObserverWrapper &Observer) { assert(MI.getOpcode() == TargetOpcode::G_ZEXT); Builder.setInstr(MI); @@ -121,6 +122,18 @@ return true; } + // zext(zext x) -> (zext x) + Register ZextSrc; + if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ZextSrc); + Observer.changedInstr(MI); + UpdatedDefs.push_back(DstReg); + markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); + return true; + } + // Try to fold zext(g_constant) when the larger constant type is legal. // Can't use MIPattern because we don't have a specific constant in mind. auto *SrcMI = MRI.getVRegDef(SrcReg); @@ -162,6 +175,21 @@ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } + + // sext(zext x) -> (zext x) + // sext(sext x) -> (sext x) + Register ExtSrc; + MachineInstr *ExtMI; + if (mi_match(SrcReg, MRI, + m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), + m_GSExt(m_Reg(ExtSrc)))))) { + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); + Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); + UpdatedDefs.push_back(DstReg); + markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); + return true; + } + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } @@ -582,7 +610,7 @@ Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs); break; case TargetOpcode::G_ZEXT: - Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs); + Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); break; case TargetOpcode::G_SEXT: Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); @@ -657,15 +685,13 @@ } } - /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be - /// dead due to MI being killed, then mark DefMI as dead too. - /// Some of the combines (extends(trunc)), try to walk through redundant - /// copies in between the extends and the truncs, and this attempts to collect - /// the in between copies if they're dead. - void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, - SmallVectorImpl &DeadInsts) { - DeadInsts.push_back(&MI); - + /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI + /// (either by killing it or changing operands) results in DefMI being dead + /// too. In-between COPYs or artifact-casts are also collected if they are + /// dead. + /// MI is not marked dead. + void markDefDead(MachineInstr &MI, MachineInstr &DefMI, + SmallVectorImpl &DeadInsts) { // Collect all the copy instructions that are made dead, due to deleting // this instruction. Collect all of them until the Trunc(DefMI). // Eg, @@ -696,6 +722,17 @@ DeadInsts.push_back(&DefMI); } + /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be + /// dead due to MI being killed, then mark DefMI as dead too. + /// Some of the combines (extends(trunc)), try to walk through redundant + /// copies in between the extends and the truncs, and this attempts to collect + /// the in between copies if they're dead. + void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, + SmallVectorImpl &DeadInsts) { + DeadInsts.push_back(&MI); + markDefDead(MI, DefMI, DeadInsts); + } + /// Erase the dead instructions in the list and call the observer hooks. /// Normally the Legalizer will deal with erasing instructions that have been /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir @@ -35,3 +35,51 @@ %2:_(s64) = G_ZEXT %1 $vgpr0_vgpr1 = COPY %2 ... + +--- +name: test_zext_zext_i32_i48_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_zext_i32_i48_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s48) = G_ZEXT %0 + %2:_(s64) = G_ZEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_sext_zext_i32_i48_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_zext_i32_i48_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s48) = G_ZEXT %0 + %2:_(s64) = G_SEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_sext_sext_i32_i48_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_sext_i32_i48_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s48) = G_SEXT %0 + %2:_(s64) = G_SEXT %1 + $vgpr0_vgpr1 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -18,24 +18,16 @@ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32) - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7 + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) ; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -18,24 +18,16 @@ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32) - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7 + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) ; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: $vgpr1 = COPY [[ZEXT]](s32)