Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -293,6 +293,10 @@ /// type that's wider than the given size. LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size); +/// True iff the specified type index is a scalar whose size is not a multiple +/// of 32. +LegalityPredicate sizeNotMultiple32(unsigned TypeIdx); + /// True iff the specified type index is a scalar whose size is not a power of /// 2. LegalityPredicate sizeNotPow2(unsigned TypeIdx); @@ -348,6 +352,10 @@ /// next power of 2. LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0); +/// Widen the scalar type or vector element type for the given type index to the +/// next multiple of 32. +LegalizeMutation widenScalarOrEltToNextMultiple32(unsigned TypeIdx, unsigned Min = 0); + /// Add more elements to the type for the given type index to the next power of /// 2. LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0); @@ -828,6 +836,14 @@ LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize)); } + LegalizeRuleSet &widenScalarToNextMultiple32(unsigned TypeIdx, + unsigned MinSize = 0) { + using namespace LegalityPredicates; + return actionIf( + LegalizeAction::WidenScalar, sizeNotMultiple32(typeIdx(TypeIdx)), + LegalizeMutations::widenScalarOrEltToNextMultiple32(TypeIdx, MinSize)); + } + /// Widen the scalar or vector element type to the next power of two that is /// at least MinSize. No effect if the scalar size is a power of two. LegalizeRuleSet &widenScalarOrEltToNextPow2(unsigned TypeIdx, Index: llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -153,6 +153,13 @@ }; } +LegalityPredicate LegalityPredicates::sizeNotMultiple32(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && (QueryTy.getSizeInBits() % 32); + }; +} + LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; Index: llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -63,6 +63,16 @@ }; } +LegalizeMutation LegalizeMutations::widenScalarOrEltToNextMultiple32(unsigned TypeIdx, + unsigned Min) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + unsigned NewEltSizeInBits = + std::max((Ty.getScalarSizeInBits() & ~0x1f) + 32, Min); + return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits)); + }; +} + LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx, unsigned Min) { return [=](const LegalityQuery &Query) { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -532,10 +532,10 @@ // Full set of gfx9 features. getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32, S16, V2S16}) + .widenScalarToNextMultiple32(0, 32) .clampScalar(0, S16, S32) .clampMaxNumElements(0, S16, 2) - .scalarize(0) - .widenScalarToNextPow2(0, 32); + .scalarize(0); getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) .legalFor({S32, S16, V2S16}) // Clamp modifier @@ -547,9 +547,9 @@ } else if (ST.has16BitInsts()) { getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32, S16}) + .widenScalarToNextMultiple32(0, 32) .clampScalar(0, S16, S32) - .scalarize(0) - .widenScalarToNextPow2(0, 32); // FIXME: min should be 16 + .scalarize(0); // Technically the saturating operations require clamp bit support, but this // was introduced at the same time as 16-bit operations. @@ -569,6 +569,7 @@ } else { getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32}) + .widenScalarToNextMultiple32(0, 32) .clampScalar(0, S32, S32) .scalarize(0); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -59,7 +59,7 @@ ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 6, v0 ; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 ; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 @@ -91,7 +91,7 @@ ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, 6, v0 ; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 ; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 @@ -123,7 +123,7 @@ ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, 6, v0 ; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 ; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0 @@ -190,7 +190,7 @@ ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 6, v2 ; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 ; GFX8-NEXT: v_and_b32_e32 v2, v3, v4 @@ -221,7 +221,7 @@ ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 +; GFX9-NEXT: v_sub_u32_e32 v3, 6, v2 ; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 ; GFX9-NEXT: v_and_b32_e32 v2, v3, v4 @@ -253,7 +253,7 @@ ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f -; GFX10-NEXT: v_sub_nc_u16 v4, 6, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, 6, v2 ; GFX10-NEXT: v_and_b32_e32 v2, v2, v3 ; GFX10-NEXT: v_and_b32_e32 v3, v4, v3 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -58,7 +58,7 @@ ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 6, v0 ; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 @@ -89,7 +89,7 @@ ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, 6, v0 ; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 ; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0 @@ -120,7 +120,7 @@ ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, 6, v0 ; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 ; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 @@ -187,7 +187,7 @@ ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 6, v2 ; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 ; GFX8-NEXT: v_and_b32_e32 v3, v3, v4 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 @@ -218,7 +218,7 @@ ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 +; GFX9-NEXT: v_sub_u32_e32 v3, 6, v2 ; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 ; GFX9-NEXT: v_and_b32_e32 v3, v3, v4 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 @@ -250,7 +250,7 @@ ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f -; GFX10-NEXT: v_sub_nc_u16 v4, 6, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, 6, v2 ; GFX10-NEXT: v_and_b32_e32 v2, v2, v3 ; GFX10-NEXT: v_and_b32_e32 v4, v4, v3 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -387,22 +387,16 @@ ; GFX8-LABEL: name: test_add_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX8: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_add_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -523,23 +523,11 @@ ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<4 x s8>) = G_BITCAST %0 @@ -558,26 +546,22 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV4]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV5]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV6]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV7]] ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -625,39 +609,15 @@ ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR1]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR12]], [[LSHR12]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[LSHR13]], [[LSHR13]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<8 x s4>) = G_BITCAST %0 @@ -676,52 +636,44 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[TRUNC13]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[TRUNC15]] + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV8]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV9]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV10]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV11]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV12]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV13]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV14]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV15]] ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND2]], [[C2]](s16) ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[ADD4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[ADD5]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ADD6]], [[C]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[ADD6]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C2]](s16) ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ADD7]], [[C]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[ADD7]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -822,41 +774,17 @@ ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC8]](<8 x s8>) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR1]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV1]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<8 x s8>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_BITCAST %0 %2:_(<8 x s8>) = G_ADD %1, %1 @@ -988,108 +916,60 @@ ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C4]](s32) ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR1]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[LSHR6]] + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV1]] + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[LSHR7]] + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[LSHR12]], [[LSHR12]] + ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[LSHR13]], [[LSHR13]] ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C7]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C7]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C7]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C7]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C7]] + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C7]] ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C7]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C7]] + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C7]] + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD5]], [[C7]] ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C7]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C7]] + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C7]] + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD7]], [[C7]] ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C7]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C7]] + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD8]], [[C7]] + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD9]], [[C7]] ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C3]](s32) ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C7]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C7]] + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD10]], [[C7]] + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ADD11]], [[C7]] ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT12]], [[C7]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT13]], [[C7]] + ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ADD12]], [[C7]] + ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ADD13]], [[C7]] ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C3]](s32) ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT14]], [[C7]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT15]], [[C7]] + ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ADD14]], [[C7]] + ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ADD15]], [[C7]] ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) @@ -1210,23 +1090,27 @@ ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[UV]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[ANYEXT]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[LSHR]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[ANYEXT1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[UV1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[ANYEXT2]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[LSHR1]] + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32) + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[ANYEXT3]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) ; CHECK: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; CHECK: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) %0:_(s64) = COPY $vgpr0_vgpr1 @@ -1257,57 +1141,21 @@ ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[BUILD_VECTOR]](<12 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC12]](<12 x s8>) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR1]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV1]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV2]] + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[LSHR6]] + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[LSHR7]] + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[BUILD_VECTOR]](<12 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<12 x s8>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<12 x s8>) = G_BITCAST %0 %2:_(<12 x s8>) = G_ADD %1, %1 @@ -1383,36 +1231,30 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV6]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV7]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV8]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV9]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV10]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV11]] ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[ADD4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[ADD5]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF @@ -1529,39 +1371,47 @@ ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[UV2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[ANYEXT]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[LSHR]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[ANYEXT1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[UV3]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[ANYEXT2]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[LSHR1]] + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32) + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[ANYEXT3]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[UV4]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ANYEXT4]](s32) + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[COPY10]], [[ANYEXT4]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY11]], [[LSHR6]] + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ANYEXT5]](s32) + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[COPY12]], [[ANYEXT5]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[COPY13]], [[UV5]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[ANYEXT6]](s32) + ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[COPY14]], [[ANYEXT6]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[COPY15]], [[LSHR7]] + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[ANYEXT7]](s32) + ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[COPY16]], [[ANYEXT7]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 @@ -1680,71 +1530,23 @@ ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR1]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV1]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV2]] + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[LSHR6]] + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[LSHR7]] + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV3]] + ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 @@ -1852,39 +1654,47 @@ ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR6]], [[LSHR6]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR7]], [[LSHR7]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[COPY1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[COPY3]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT1]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[COPY5]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[COPY7]] + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32) + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT3]], [[COPY8]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[BITCAST2]], [[COPY9]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ANYEXT4]](s32) + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT4]], [[COPY10]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[COPY11]] + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ANYEXT5]](s32) + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT5]], [[COPY12]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[BITCAST3]], [[COPY13]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[ANYEXT6]](s32) + ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT6]], [[COPY14]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[COPY15]] + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[ANYEXT7]](s32) + ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT7]], [[COPY16]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 @@ -2345,40 +2155,44 @@ ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[COPY1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[COPY3]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT1]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[COPY5]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[COPY7]] + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32) + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT3]], [[COPY8]] ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C2]] ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C2]] + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD5]], [[C2]] ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C2]] + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD7]], [[C2]] ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -500,21 +500,58 @@ $vgpr0 = COPY %5 ... -# FIXME: -# --- -# name: test_mul_s33 -# body: | -# bb.0: -# liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - -# %0:_(s64) = COPY $vgpr0_vgpr1 -# %1:_(s64) = COPY $vgpr2_vgpr3 -# %2:_(s33) = G_TRUNC %0 -# %3:_(s33) = G_TRUNC %1 -# %4:_(s33) = G_MUL %2, %3 -# %5:_(s64) = G_ANYEXT %4 -# $vgpr0_vgpr1 = COPY %5 -# ... +--- +name: test_mul_s33 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX6-LABEL: name: test_mul_s33 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-LABEL: name: test_mul_s33 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-LABEL: name: test_mul_s33 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s33) = G_TRUNC %0 + %3:_(s33) = G_TRUNC %1 + %4:_(s33) = G_MUL %2, %3 + %5:_(s64) = G_ANYEXT %4 + $vgpr0_vgpr1 = COPY %5 +... --- name: test_mul_s96 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir @@ -385,19 +385,13 @@ ; GFX8-LABEL: name: test_sub_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; GFX8: $vgpr0 = COPY [[SUB]](s32) ; GFX9-LABEL: name: test_sub_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[SUB]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -276,6 +276,57 @@ ret <2 x i32> %result } +define amdgpu_cs i33 @s_mul_i33(i33 inreg %num, i33 inreg %den) { +; GFX7-LABEL: s_mul_i33: +; GFX7: ; %bb.0: +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX7-NEXT: s_mul_i32 s4, s0, s2 +; GFX7-NEXT: s_mul_i32 s1, s1, s2 +; GFX7-NEXT: s_mul_i32 s0, s0, s3 +; GFX7-NEXT: s_add_i32 s1, s1, s0 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, s1, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v0 +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_mul_i33: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX8-NEXT: s_mul_i32 s4, s0, s2 +; GFX8-NEXT: s_mul_i32 s1, s1, s2 +; GFX8-NEXT: s_mul_i32 s0, s0, s3 +; GFX8-NEXT: s_add_i32 s1, s1, s0 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s1, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 +; GFX8-NEXT: s_mov_b32 s0, s4 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_mul_i33: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mul_i32 s1, s1, s2 +; GFX9-NEXT: s_mul_i32 s3, s0, s3 +; GFX9-NEXT: s_mul_i32 s4, s0, s2 +; GFX9-NEXT: s_mul_hi_u32 s0, s0, s2 +; GFX9-NEXT: s_add_i32 s1, s1, s3 +; GFX9-NEXT: s_add_i32 s1, s1, s0 +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_mul_i33: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mul_i32 s1, s1, s2 +; GFX10-NEXT: s_mul_i32 s3, s0, s3 +; GFX10-NEXT: s_mul_hi_u32 s4, s0, s2 +; GFX10-NEXT: s_add_i32 s1, s1, s3 +; GFX10-NEXT: s_mul_i32 s0, s0, s2 +; GFX10-NEXT: s_add_i32 s1, s1, s4 +; GFX10-NEXT: ; return to shader part epilog + %result = mul i33 %num, %den + ret i33 %result +} + define amdgpu_ps i64 @s_mul_i64(i64 inreg %num, i64 inreg %den) { ; GFX7-LABEL: s_mul_i64: ; GFX7: ; %bb.0: