diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -113,14 +113,23 @@ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits()); - auto MIBMask = Builder.buildConstant( - DstTy, Mask.zext(DstTy.getScalarSizeInBits())); + auto MIBMask = + Builder.buildConstant(DstTy, Mask.zext(DstTy.getScalarSizeInBits())); Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBMask); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } + // zext(zext x) -> (zext x) + Register ZextSrc; + if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); + Builder.buildZExt(DstReg, ZextSrc); + markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); + return true; + } + // Try to fold zext(g_constant) when the larger constant type is legal. // Can't use MIPattern because we don't have a specific constant in mind. auto *SrcMI = MRI.getVRegDef(SrcReg); @@ -162,6 +171,20 @@ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } + + // sext(zext x) -> (zext x) + // sext(sext x) -> (sext x) + Register ExtSrc; + MachineInstr *ExtMI; + if (mi_match(SrcReg, MRI, + m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), + m_GSExt(m_Reg(ExtSrc)))))) { + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); + Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); + markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); + return true; + } + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir @@ -35,3 +35,51 @@ %2:_(s64) = G_ZEXT %1 $vgpr0_vgpr1 = COPY %2 ... + +--- +name: test_zext_zext_i32_i48_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_zext_i32_i48_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s48) = G_ZEXT %0 + %2:_(s64) = G_ZEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_sext_zext_i32_i48_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_zext_i32_i48_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s48) = G_ZEXT %0 + %2:_(s64) = G_SEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_sext_sext_i32_i48_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_sext_i32_i48_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s48) = G_SEXT %0 + %2:_(s64) = G_SEXT %1 + $vgpr0_vgpr1 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -18,24 +18,16 @@ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32) - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7 + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) ; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -18,24 +18,16 @@ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32) - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7 + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) ; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: $vgpr1 = COPY [[ZEXT]](s32)