Index: include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -80,11 +80,11 @@ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLT DstTy = MRI.getType(DstReg); if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || - isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) + isConstantUnsupported(DstTy)) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - APInt Mask = APInt::getAllOnesValue(SrcTy.getSizeInBits()); + APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits()); auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue()); Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBMask); @@ -112,11 +112,11 @@ // applicable. if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) || isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) || - isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) + isConstantUnsupported(DstTy)) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - unsigned ShAmt = DstTy.getSizeInBits() - SrcTy.getSizeInBits(); + unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt); auto MIBShl = Builder.buildInstr( TargetOpcode::G_SHL, {DstTy}, @@ -151,7 +151,7 @@ } else { // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. - if (isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) + if (isConstantUnsupported(DstTy)) return false; LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); Builder.buildConstant(DstReg, 0); @@ -403,6 +403,15 @@ return Step.Action == Unsupported || Step.Action == NotFound; } + bool isConstantUnsupported(LLT Ty) const { + if (!Ty.isVector()) + return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); + + LLT EltTy = Ty.getElementType(); + return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || + isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); + } + /// Looks through copy instructions and returns the actual /// source register. unsigned lookThroughCopyInstrs(unsigned Reg) { Index: test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# FIXME: Remove -global-isel-abort=0 when G_TRUNC legality handled + +--- +name: test_anyext_trunc_v2s32_to_v2s16_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[COPY]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY1]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_ANYEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_anyext_trunc_v2s32_to_v2s16_to_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s16_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_TRUNC %0 + %2:_(<2 x s64>) = G_ANYEXT %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_anyext_trunc_v2s32_to_v2s8_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s8_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[TRUNC]](<2 x s16>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s8>) = G_TRUNC %0 + %2:_(<2 x s16>) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_anyext_trunc_v3s32_to_v3s16_to_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_anyext_trunc_v3s32_to_v3s16_to_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>) = G_TRUNC %0 + %2:_(<3 x s32>) = G_ANYEXT %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + Index: test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir @@ -0,0 +1,100 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# FIXME: Remove -global-isel-abort=0 when G_TRUNC legality handled + +--- +name: test_sext_trunc_v2s32_to_v2s16_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_SEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_sext_trunc_v2s32_to_v2s16_to_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s16_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[TRUNC]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[TRUNC1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_TRUNC %0 + %2:_(<2 x s64>) = G_SEXT %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_sext_trunc_v2s32_to_v2s8_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s8_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[TRUNC]], [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: $vgpr0 = COPY [[ASHR]](<2 x s16>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s8>) = G_TRUNC %0 + %2:_(<2 x s16>) = G_SEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_sext_trunc_v3s32_to_v3s16_to_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_sext_trunc_v3s32_to_v3s16_to_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32) + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>) = G_TRUNC %0 + %2:_(<3 x s32>) = G_SEXT %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + Index: test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# FIXME: Remove -global-isel-abort=0 when G_TRUNC legality handled + +--- +name: test_zext_trunc_v2s32_to_v2s16_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[COPY]](<2 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] + ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_ZEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_zext_trunc_v2s32_to_v2s16_to_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s16_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_TRUNC %0 + %2:_(<2 x s64>) = G_ZEXT %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_zext_trunc_v2s32_to_v2s8_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s8_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] + ; CHECK: $vgpr0 = COPY [[AND]](<2 x s16>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s8>) = G_TRUNC %0 + %2:_(<2 x s16>) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_zext_trunc_v3s32_to_v3s16_to_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_zext_trunc_v3s32_to_v3s16_to_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>) = G_TRUNC %0 + %2:_(<3 x s32>) = G_ZEXT %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... +