Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -105,19 +105,23 @@ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); // zext(trunc x) - > and (aext/copy/trunc x), mask + // zext(sext x) -> and (sext x), mask Register TruncSrc; - if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { + Register SextSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || + mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { LLT DstTy = MRI.getType(DstReg); if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || isConstantUnsupported(DstTy)) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits()); - auto MIBMask = Builder.buildConstant( - DstTy, Mask.zext(DstTy.getScalarSizeInBits())); - Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), - MIBMask); + APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits()); + auto Mask = Builder.buildConstant( + DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); + auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) : + Builder.buildAnyExtOrTrunc(DstTy, TruncSrc); + Builder.buildAnd(DstReg, Extended, Mask); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -117,3 +117,68 @@ %4:_(s128) = G_ZEXT %3 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 ... + +--- +name: test_zext_s8_to_s32_of_sext_s1_to_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s1_to_s8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(eq), %0, %1 + %3:_(s8) = G_SEXT %2 + %4:_(s32) = G_ZEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: test_zext_s8_to_s32_of_sext_s1_to_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s1_to_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(eq), %0, %1 + %3:_(s16) = G_SEXT %2 + %4:_(s32) = G_ZEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: test_zext_s8_to_s32_of_sext_s8_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s8_to_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1) + %2:_(s16) = G_SEXT %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +...