Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4835,6 +4835,7 @@ Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: case TargetOpcode::G_EXTRACT: if (TypeIdx != 1) return UnableToLegalize; @@ -4843,6 +4844,7 @@ Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_INSERT: + case TargetOpcode::G_INSERT_VECTOR_ELT: case TargetOpcode::G_FREEZE: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -131,6 +131,21 @@ }; } +// Increase the number of vector elements to reach the next legal RegClass. +static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + unsigned NumElts = Ty.getNumElements(); + unsigned EltSize = Ty.getElementType().getSizeInBits(); + + // Find the nearest legal RegClass that is larger than the current type. + while (!SIRegisterInfo::getSGPRClassForBitWidth(NumElts * EltSize)) + ++NumElts; + + return std::pair(TypeIdx, LLT::fixed_vector(NumElts, EltSize)); + }; +} + static LLT getBitcastRegisterType(const LLT Ty) { const unsigned Size = Ty.getSizeInBits(); @@ -215,6 +230,14 @@ }; } +static LegalityPredicate isNotAnExistingRegClassType(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + return Ty.getSizeInBits() < MaxRegisterSize && + !SIRegisterInfo::getSGPRClassForBitWidth(Ty.getSizeInBits()); + }; +} + static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; @@ -1450,6 +1473,9 @@ unsigned IdxTypeIdx = 2; getActionDefinitionsBuilder(Op) + .moreElementsIf( + isNotAnExistingRegClassType(VecTypeIdx), + moreElementsToNextExistingRegClass(VecTypeIdx)) .customIf([=](const LegalityQuery &Query) { const LLT EltTy = Query.Types[EltTypeIdx]; const LLT VecTy = Query.Types[VecTypeIdx]; @@ -1538,6 +1564,9 @@ .legalForCartesianProduct(AllS64Vectors, {S64}) .clampNumElements(0, V16S32, V32S32) .clampNumElements(0, V2S64, V16S64) + .moreElementsIf( + isNotAnExistingRegClassType(0), + moreElementsToNextExistingRegClass(0)) .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)); if (ST.hasScalarPackInsts()) { Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2501,31 +2501,31 @@ static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth) { - if (BitWidth <= 64) + if (BitWidth == 64) return &AMDGPU::VReg_64RegClass; - if (BitWidth <= 96) + if (BitWidth == 96) return &AMDGPU::VReg_96RegClass; - if (BitWidth <= 128) + if (BitWidth == 128) return &AMDGPU::VReg_128RegClass; - if (BitWidth <= 160) + if (BitWidth == 160) return &AMDGPU::VReg_160RegClass; - if (BitWidth <= 192) + if (BitWidth == 192) return &AMDGPU::VReg_192RegClass; - if (BitWidth <= 224) + if (BitWidth == 224) return &AMDGPU::VReg_224RegClass; - if (BitWidth <= 256) + if (BitWidth == 256) return &AMDGPU::VReg_256RegClass; - if (BitWidth <= 288) + if (BitWidth == 288) return &AMDGPU::VReg_288RegClass; - if (BitWidth <= 320) + if (BitWidth == 320) return &AMDGPU::VReg_320RegClass; - if (BitWidth <= 352) + if (BitWidth == 352) return &AMDGPU::VReg_352RegClass; - if (BitWidth <= 384) + if (BitWidth == 384) return &AMDGPU::VReg_384RegClass; - if (BitWidth <= 512) + if (BitWidth == 512) return &AMDGPU::VReg_512RegClass; - if (BitWidth <= 1024) + if (BitWidth == 1024) return &AMDGPU::VReg_1024RegClass; return nullptr; @@ -2533,31 +2533,31 @@ static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth) { - if (BitWidth <= 64) + if (BitWidth == 64) return &AMDGPU::VReg_64_Align2RegClass; - if (BitWidth <= 96) + if (BitWidth == 96) return &AMDGPU::VReg_96_Align2RegClass; - if (BitWidth <= 128) + if (BitWidth == 128) return &AMDGPU::VReg_128_Align2RegClass; - if (BitWidth <= 160) + if (BitWidth == 160) return &AMDGPU::VReg_160_Align2RegClass; - if (BitWidth <= 192) + if (BitWidth == 192) return &AMDGPU::VReg_192_Align2RegClass; - if (BitWidth <= 224) + if (BitWidth == 224) return &AMDGPU::VReg_224_Align2RegClass; - if (BitWidth <= 256) + if (BitWidth == 256) return &AMDGPU::VReg_256_Align2RegClass; - if (BitWidth <= 288) + if (BitWidth == 288) return &AMDGPU::VReg_288_Align2RegClass; - if (BitWidth <= 320) + if (BitWidth == 320) return &AMDGPU::VReg_320_Align2RegClass; - if (BitWidth <= 352) + if (BitWidth == 352) return &AMDGPU::VReg_352_Align2RegClass; - if (BitWidth <= 384) + if (BitWidth == 384) return &AMDGPU::VReg_384_Align2RegClass; - if (BitWidth <= 512) + if (BitWidth == 512) return &AMDGPU::VReg_512_Align2RegClass; - if (BitWidth <= 1024) + if (BitWidth == 1024) return &AMDGPU::VReg_1024_Align2RegClass; return nullptr; @@ -2567,9 +2567,9 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const { if (BitWidth == 1) return &AMDGPU::VReg_1RegClass; - if (BitWidth <= 16) + if (BitWidth == 16) return &AMDGPU::VGPR_LO16RegClass; - if (BitWidth <= 32) + if (BitWidth == 32) return &AMDGPU::VGPR_32RegClass; return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth) : getAnyVGPRClassForBitWidth(BitWidth); @@ -2577,31 +2577,31 @@ static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth) { - if (BitWidth <= 64) + if (BitWidth == 64) return &AMDGPU::AReg_64RegClass; - if (BitWidth <= 96) + if (BitWidth == 96) return &AMDGPU::AReg_96RegClass; - if (BitWidth <= 128) + if (BitWidth == 128) return &AMDGPU::AReg_128RegClass; - if (BitWidth <= 160) + if (BitWidth == 160) return &AMDGPU::AReg_160RegClass; - if (BitWidth <= 192) + if (BitWidth == 192) return &AMDGPU::AReg_192RegClass; - if (BitWidth <= 224) + if (BitWidth == 224) return &AMDGPU::AReg_224RegClass; - if (BitWidth <= 256) + if (BitWidth == 256) return &AMDGPU::AReg_256RegClass; - if (BitWidth <= 288) + if (BitWidth == 288) return &AMDGPU::AReg_288RegClass; - if (BitWidth <= 320) + if (BitWidth == 320) return &AMDGPU::AReg_320RegClass; - if (BitWidth <= 352) + if (BitWidth == 352) return &AMDGPU::AReg_352RegClass; - if (BitWidth <= 384) + if (BitWidth == 384) return &AMDGPU::AReg_384RegClass; - if (BitWidth <= 512) + if (BitWidth == 512) return &AMDGPU::AReg_512RegClass; - if (BitWidth <= 1024) + if (BitWidth == 1024) return &AMDGPU::AReg_1024RegClass; return nullptr; @@ -2609,31 +2609,31 @@ static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth) { - if (BitWidth <= 64) + if (BitWidth == 64) return &AMDGPU::AReg_64_Align2RegClass; - if (BitWidth <= 96) + if (BitWidth == 96) return &AMDGPU::AReg_96_Align2RegClass; - if (BitWidth <= 128) + if (BitWidth == 128) return &AMDGPU::AReg_128_Align2RegClass; - if (BitWidth <= 160) + if (BitWidth == 160) return &AMDGPU::AReg_160_Align2RegClass; - if (BitWidth <= 192) + if (BitWidth == 192) return &AMDGPU::AReg_192_Align2RegClass; - if (BitWidth <= 224) + if (BitWidth == 224) return &AMDGPU::AReg_224_Align2RegClass; - if (BitWidth <= 256) + if (BitWidth == 256) return &AMDGPU::AReg_256_Align2RegClass; - if (BitWidth <= 288) + if (BitWidth == 288) return &AMDGPU::AReg_288_Align2RegClass; - if (BitWidth <= 320) + if (BitWidth == 320) return &AMDGPU::AReg_320_Align2RegClass; - if (BitWidth <= 352) + if (BitWidth == 352) return &AMDGPU::AReg_352_Align2RegClass; - if (BitWidth <= 384) + if (BitWidth == 384) return &AMDGPU::AReg_384_Align2RegClass; - if (BitWidth <= 512) + if (BitWidth == 512) return &AMDGPU::AReg_512_Align2RegClass; - if (BitWidth <= 1024) + if (BitWidth == 1024) return &AMDGPU::AReg_1024_Align2RegClass; return nullptr; @@ -2641,9 +2641,9 @@ const TargetRegisterClass * SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const { - if (BitWidth <= 16) + if (BitWidth == 16) return &AMDGPU::AGPR_LO16RegClass; - if (BitWidth <= 32) + if (BitWidth == 32) return &AMDGPU::AGPR_32RegClass; return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth) : getAnyAGPRClassForBitWidth(BitWidth); @@ -2651,31 +2651,31 @@ static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth) { - if (BitWidth <= 64) + if (BitWidth == 64) return &AMDGPU::AV_64RegClass; - if (BitWidth <= 96) + if (BitWidth == 96) return &AMDGPU::AV_96RegClass; - if (BitWidth <= 128) + if (BitWidth == 128) return &AMDGPU::AV_128RegClass; - if (BitWidth <= 160) + if (BitWidth == 160) return &AMDGPU::AV_160RegClass; - if (BitWidth <= 192) + if (BitWidth == 192) return &AMDGPU::AV_192RegClass; - if (BitWidth <= 224) + if (BitWidth == 224) return &AMDGPU::AV_224RegClass; - if (BitWidth <= 256) + if (BitWidth == 256) return &AMDGPU::AV_256RegClass; - if (BitWidth <= 288) + if (BitWidth == 288) return &AMDGPU::AV_288RegClass; - if (BitWidth <= 320) + if (BitWidth == 320) return &AMDGPU::AV_320RegClass; - if (BitWidth <= 352) + if (BitWidth == 352) return &AMDGPU::AV_352RegClass; - if (BitWidth <= 384) + if (BitWidth == 384) return &AMDGPU::AV_384RegClass; - if (BitWidth <= 512) + if (BitWidth == 512) return &AMDGPU::AV_512RegClass; - if (BitWidth <= 1024) + if (BitWidth == 1024) return &AMDGPU::AV_1024RegClass; return nullptr; @@ -2683,31 +2683,31 @@ static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) { - if (BitWidth <= 64) + if (BitWidth == 64) return &AMDGPU::AV_64_Align2RegClass; - if (BitWidth <= 96) + if (BitWidth == 96) return &AMDGPU::AV_96_Align2RegClass; - if (BitWidth <= 128) + if (BitWidth == 128) return &AMDGPU::AV_128_Align2RegClass; - if (BitWidth <= 160) + if (BitWidth == 160) return &AMDGPU::AV_160_Align2RegClass; - if (BitWidth <= 192) + if (BitWidth == 192) return &AMDGPU::AV_192_Align2RegClass; - if (BitWidth <= 224) + if (BitWidth == 224) return &AMDGPU::AV_224_Align2RegClass; - if (BitWidth <= 256) + if (BitWidth == 256) return &AMDGPU::AV_256_Align2RegClass; - if (BitWidth <= 288) + if (BitWidth == 288) return &AMDGPU::AV_288_Align2RegClass; - if (BitWidth <= 320) + if (BitWidth == 320) return &AMDGPU::AV_320_Align2RegClass; - if (BitWidth <= 352) + if (BitWidth == 352) return &AMDGPU::AV_352_Align2RegClass; - if (BitWidth <= 384) + if (BitWidth == 384) return &AMDGPU::AV_384_Align2RegClass; - if (BitWidth <= 512) + if (BitWidth == 512) return &AMDGPU::AV_512_Align2RegClass; - if (BitWidth <= 1024) + if (BitWidth == 1024) return &AMDGPU::AV_1024_Align2RegClass; return nullptr; @@ -2715,9 +2715,9 @@ const TargetRegisterClass * SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const { - if (BitWidth <= 16) + if (BitWidth == 16) return &AMDGPU::VGPR_LO16RegClass; - if (BitWidth <= 32) + if (BitWidth == 32) return &AMDGPU::AV_32RegClass; return ST.needsAlignedVGPRs() ? getAlignedVectorSuperClassForBitWidth(BitWidth) @@ -2726,35 +2726,35 @@ const TargetRegisterClass * SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { - if (BitWidth <= 16) + if (BitWidth == 16) return &AMDGPU::SGPR_LO16RegClass; - if (BitWidth <= 32) + if (BitWidth == 32) return &AMDGPU::SReg_32RegClass; - if (BitWidth <= 64) + if (BitWidth == 64) return &AMDGPU::SReg_64RegClass; - if (BitWidth <= 96) + if (BitWidth == 96) return &AMDGPU::SGPR_96RegClass; - if (BitWidth <= 128) + if (BitWidth == 128) return &AMDGPU::SGPR_128RegClass; - if (BitWidth <= 160) + if (BitWidth == 160) return &AMDGPU::SGPR_160RegClass; - if (BitWidth <= 192) + if (BitWidth == 192) return &AMDGPU::SGPR_192RegClass; - if (BitWidth <= 224) + if (BitWidth == 224) return &AMDGPU::SGPR_224RegClass; - if (BitWidth <= 256) + if (BitWidth == 256) return &AMDGPU::SGPR_256RegClass; - if (BitWidth <= 288) + if (BitWidth == 288) return &AMDGPU::SGPR_288RegClass; - if (BitWidth <= 320) + if (BitWidth == 320) return &AMDGPU::SGPR_320RegClass; - if (BitWidth <= 352) + if (BitWidth == 352) return &AMDGPU::SGPR_352RegClass; - if (BitWidth <= 384) + if (BitWidth == 384) return &AMDGPU::SGPR_384RegClass; - if (BitWidth <= 512) + if (BitWidth == 512) return &AMDGPU::SGPR_512RegClass; - if (BitWidth <= 1024) + if (BitWidth == 1024) return &AMDGPU::SGPR_1024RegClass; return nullptr; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll @@ -7,13 +7,11 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 1 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; CHECK-NEXT: v_bfe_i32 v2, v1, 0, 1 -; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; CHECK-NEXT: v_mov_b32_e32 v3, v2 ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %arg, zeroinitializer %sext = sext <2 x i1> %cmp to <2 x i64> @@ -29,10 +27,8 @@ ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; CHECK-NEXT: v_and_b32_e32 v2, 1, v1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %arg, zeroinitializer %sext = zext <2 x i1> %cmp to <2 x i64> Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -1412,12 +1412,10 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT1]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -1453,13 +1451,10 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT1]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -1536,15 +1531,12 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT2]], 1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG2]](s64) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT1]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT2]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -207,13 +207,11 @@ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -239,13 +237,11 @@ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2768,8 +2768,13 @@ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v13, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v14, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 +; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr14 killed $exec +; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr15 killed $exec +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog @@ -2808,8 +2813,11 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -2847,8 +2855,11 @@ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -2879,6 +2890,9 @@ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v14 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v7f64_v_v: @@ -2903,6 +2917,9 @@ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v7f64_v_v: @@ -2921,6 +2938,8 @@ ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 +; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <7 x double> %vec, i32 %sel @@ -3422,42 +3441,82 @@ ; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v12, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_v: -; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41700000, vcc_lo -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: dyn_extract_v15f32_const_s_v: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: dyn_extract_v15f32_const_s_v: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo +; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> , i32 %sel ret float %ext @@ -3557,7 +3616,9 @@ ; GCN-NEXT: v_mov_b32_e32 v15, s16 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v: @@ -3590,7 +3651,9 @@ ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s15, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel @@ -3629,41 +3692,81 @@ ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: dyn_extract_v15f32_v_v: -; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: dyn_extract_v15f32_v_v: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: dyn_extract_v15f32_v_v: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo +; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext @@ -3825,6 +3928,8 @@ ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 +; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: @@ -3859,42 +3964,83 @@ ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 +; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: dyn_extract_v15f32_v_v_offset3: -; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10PLUS-NEXT: v_add_nc_u32_e32 v15, 3, v15 -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo +; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <15 x float> %vec, i32 %add Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -5670,6 +5670,10 @@ ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 @@ -5683,43 +5687,39 @@ ; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 -; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v5, v0, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v0, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v0, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v0, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v0, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v15, v0, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v1, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v14, v1, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v16, v1, s[8:9] ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3 -; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4 -; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 -; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6 -; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5 -; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8 -; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 -; GPRIDX-NEXT: v_readfirstlane_b32 s7, v10 -; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9 -; GPRIDX-NEXT: v_readfirstlane_b32 s9, v12 -; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11 -; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 -; GPRIDX-NEXT: v_readfirstlane_b32 s12, v0 -; GPRIDX-NEXT: v_readfirstlane_b32 s13, v1 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v7, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v8, v1, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v9, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v10, v1, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v2 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v11, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v12, v1, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v2 +; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v13, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v14, v1, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v2 +; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v15, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v16, v1, vcc +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v7f64_s_v_v: @@ -5739,9 +5739,13 @@ ; GFX10-NEXT: s_mov_b32 s12, s14 ; GFX10-NEXT: s_mov_b32 s13, s15 ; GFX10-NEXT: v_mov_b32_e32 v18, s15 +; GFX10-NEXT: v_mov_b32_e32 v3, s0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 +; GFX10-NEXT: v_mov_b32_e32 v4, s1 ; GFX10-NEXT: v_mov_b32_e32 v17, s14 ; GFX10-NEXT: v_mov_b32_e32 v16, s13 ; GFX10-NEXT: v_mov_b32_e32 v15, s12 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v14, s11 ; GFX10-NEXT: v_mov_b32_e32 v13, s10 ; GFX10-NEXT: v_mov_b32_e32 v12, s9 @@ -5752,43 +5756,39 @@ ; GFX10-NEXT: v_mov_b32_e32 v7, s4 ; GFX10-NEXT: v_mov_b32_e32 v6, s3 ; GFX10-NEXT: v_mov_b32_e32 v5, s2 -; GFX10-NEXT: v_mov_b32_e32 v4, s1 -; GFX10-NEXT: v_mov_b32_e32 v3, s0 -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 6, v2 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s0, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 +; GFX10-NEXT: v_readfirstlane_b32 s1, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s2, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v2 +; GFX10-NEXT: v_readfirstlane_b32 s4, v4 +; GFX10-NEXT: v_readfirstlane_b32 s5, v5 +; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 -; GFX10-NEXT: v_readfirstlane_b32 s2, v5 -; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v2, v12, v1, vcc_lo -; GFX10-NEXT: v_readfirstlane_b32 s3, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v13, v0, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v13, v14, v1, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v15, v0, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v16, v1, s1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v3 -; GFX10-NEXT: v_readfirstlane_b32 s1, v4 -; GFX10-NEXT: v_readfirstlane_b32 s4, v7 -; GFX10-NEXT: v_readfirstlane_b32 s5, v8 -; GFX10-NEXT: v_readfirstlane_b32 s6, v9 -; GFX10-NEXT: v_readfirstlane_b32 s7, v10 -; GFX10-NEXT: v_readfirstlane_b32 s8, v11 -; GFX10-NEXT: v_readfirstlane_b32 s9, v2 -; GFX10-NEXT: v_readfirstlane_b32 s10, v12 -; GFX10-NEXT: v_readfirstlane_b32 s11, v13 -; GFX10-NEXT: v_readfirstlane_b32 s12, v0 -; GFX10-NEXT: v_readfirstlane_b32 s13, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v6 +; GFX10-NEXT: v_readfirstlane_b32 s7, v7 +; GFX10-NEXT: v_cndmask_b32_e32 v8, v11, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, v12, v1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v2 +; GFX10-NEXT: v_readfirstlane_b32 s8, v8 +; GFX10-NEXT: v_readfirstlane_b32 s9, v9 +; GFX10-NEXT: v_cndmask_b32_e32 v10, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v11, v14, v1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s10, v10 +; GFX10-NEXT: v_readfirstlane_b32 s11, v11 +; GFX10-NEXT: v_cndmask_b32_e32 v12, v15, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v13, v16, v1, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s12, v12 +; GFX10-NEXT: v_readfirstlane_b32 s13, v13 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v7f64_s_v_v: @@ -5808,45 +5808,45 @@ ; GFX11-NEXT: s_mov_b32 s12, s14 ; GFX11-NEXT: s_mov_b32 s13, s15 ; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14 +; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 ; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12 ; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10 ; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8 ; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6 ; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4 ; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2 -; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 6, v2 -; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1 +; GFX11-NEXT: v_dual_cndmask_b32 v18, v3, v0 :: v_dual_cndmask_b32 v17, v4, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 -; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1 -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 -; GFX11-NEXT: v_readfirstlane_b32 s2, v5 -; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v2, v12, v1 -; GFX11-NEXT: v_readfirstlane_b32 s3, v6 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v13, v0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v13, v14, v1, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v15, v0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v16, v1, s1 -; GFX11-NEXT: v_readfirstlane_b32 s0, v3 -; GFX11-NEXT: v_readfirstlane_b32 s1, v4 -; GFX11-NEXT: v_readfirstlane_b32 s4, v7 -; GFX11-NEXT: v_readfirstlane_b32 s5, v8 -; GFX11-NEXT: v_readfirstlane_b32 s6, v9 -; GFX11-NEXT: v_readfirstlane_b32 s7, v10 +; GFX11-NEXT: v_cmp_eq_u32_e64 s6, 4, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, v0, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, v6, v1, s0 +; GFX11-NEXT: v_cmp_eq_u32_e64 s9, 6, v2 +; GFX11-NEXT: v_dual_cndmask_b32 v6, v7, v0 :: v_dual_cndmask_b32 v5, v8, v1 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v0, s6 +; GFX11-NEXT: v_readfirstlane_b32 s0, v18 +; GFX11-NEXT: v_readfirstlane_b32 s1, v17 +; GFX11-NEXT: v_readfirstlane_b32 s2, v3 +; GFX11-NEXT: v_dual_cndmask_b32 v7, v9, v0 :: v_dual_cndmask_b32 v8, v10, v1 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v9, v12, v1, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v12, v15, v0, s9 +; GFX11-NEXT: v_readfirstlane_b32 s3, v4 +; GFX11-NEXT: v_readfirstlane_b32 s4, v6 +; GFX11-NEXT: v_cndmask_b32_e32 v10, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v14, v14, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v13, v16, v1, s9 +; GFX11-NEXT: v_readfirstlane_b32 s5, v5 +; GFX11-NEXT: v_readfirstlane_b32 s6, v7 +; GFX11-NEXT: v_readfirstlane_b32 s7, v8 ; GFX11-NEXT: v_readfirstlane_b32 s8, v11 -; GFX11-NEXT: v_readfirstlane_b32 s9, v2 -; GFX11-NEXT: v_readfirstlane_b32 s10, v12 -; GFX11-NEXT: v_readfirstlane_b32 s11, v13 -; GFX11-NEXT: v_readfirstlane_b32 s12, v0 -; GFX11-NEXT: v_readfirstlane_b32 s13, v1 +; GFX11-NEXT: v_readfirstlane_b32 s9, v9 +; GFX11-NEXT: v_readfirstlane_b32 s10, v10 +; GFX11-NEXT: v_readfirstlane_b32 s11, v14 +; GFX11-NEXT: v_readfirstlane_b32 s12, v12 +; GFX11-NEXT: v_readfirstlane_b32 s13, v13 ; GFX11-NEXT: ; return to shader part epilog entry: %insert = insertelement <7 x double> %vec, double %val, i32 %idx @@ -5908,26 +5908,26 @@ ; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v16 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 2, v16 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 3, v16 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 4, v16 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 5, v16 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 6, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v14, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v14, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v14, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[10:11] ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v15, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[10:11] +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v15, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v14, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v15, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 +; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 +; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v14, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v15, vcc ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 @@ -5947,38 +5947,38 @@ ; GFX10-LABEL: dyn_insertelement_v7f64_v_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 2, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 3, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s3, 4, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 5, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 6, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v14, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v14, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v14, s3 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v14, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v14, s5 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v15, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v15, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v15, s3 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v15, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v15, s5 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v15, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX10-NEXT: v_readfirstlane_b32 s2, v2 ; GFX10-NEXT: v_readfirstlane_b32 s3, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 ; GFX10-NEXT: v_readfirstlane_b32 s4, v4 ; GFX10-NEXT: v_readfirstlane_b32 s5, v5 +; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 ; GFX10-NEXT: v_readfirstlane_b32 s6, v6 ; GFX10-NEXT: v_readfirstlane_b32 s7, v7 +; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v15, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 ; GFX10-NEXT: v_readfirstlane_b32 s8, v8 ; GFX10-NEXT: v_readfirstlane_b32 s9, v9 +; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 ; GFX10-NEXT: v_readfirstlane_b32 s10, v10 ; GFX10-NEXT: v_readfirstlane_b32 s11, v11 +; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v15, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s12, v12 ; GFX10-NEXT: v_readfirstlane_b32 s13, v13 ; GFX10-NEXT: ; return to shader part epilog @@ -5986,14 +5986,14 @@ ; GFX11-LABEL: dyn_insertelement_v7f64_v_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v16 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 6, v16 +; GFX11-NEXT: v_cmp_eq_u32_e64 s9, 5, v16 +; GFX11-NEXT: v_cmp_eq_u32_e64 s10, 6, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 -; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v14, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v15, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v14, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v15, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v14, s9 +; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v15, s9 +; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v14, s10 +; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v15, s10 ; GFX11-NEXT: v_dual_cndmask_b32 v2, v2, v14 :: v_dual_cndmask_b32 v3, v3, v15 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir @@ -207,12 +207,12 @@ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -301,14 +301,14 @@ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -417,8 +417,27 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[SHL3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s8>) = G_TRUNC %0 @@ -799,8 +818,29 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C3]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST2]](<2 x s32>), [[LSHR]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s16>) = G_TRUNC %0 @@ -820,9 +860,26 @@ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s16>) = G_TRUNC %0 @@ -843,8 +900,24 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 1 %2:_(<3 x s16>) = G_TRUNC %0 @@ -865,8 +938,25 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 2 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1426,9 +1516,24 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[SHL3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s24) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -240,12 +240,9 @@ ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_icmp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 @@ -255,12 +252,9 @@ ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_icmp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 @@ -270,12 +264,9 @@ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = G_CONSTANT i32 0 %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 @@ -301,14 +292,10 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX8-LABEL: name: test_icmp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 @@ -320,14 +307,10 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_icmp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 @@ -339,14 +322,10 @@ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 @@ -373,16 +352,11 @@ ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] ; GFX7-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX7-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX7-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; GFX7-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP3]](s1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) ; GFX8-LABEL: name: test_icmp_v4s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 @@ -396,16 +370,11 @@ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP3]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_icmp_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 @@ -419,16 +388,11 @@ ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP3]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(p1) = G_IMPLICIT_DEF %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load (<4 x s32>)) @@ -708,11 +672,9 @@ ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX7-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_icmp_v2p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -723,11 +685,9 @@ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_icmp_v2p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -738,11 +698,9 @@ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr0_vgpr1 @@ -765,11 +723,9 @@ ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(p999), [[UV3:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY1]](<2 x p999>) ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] - ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX7-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX7-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_icmp_v2p999 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 @@ -780,11 +736,9 @@ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(p999), [[UV3:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY1]](<2 x p999>) ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_icmp_v2p999 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 @@ -795,11 +749,9 @@ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(p999), [[UV3:%[0-9]+]]:_(p999) = G_UNMERGE_VALUES [[COPY1]](<2 x p999>) ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -1681,12 +1681,9 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[PHI1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir @@ -51,12 +51,9 @@ ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[UV2]], [[UV4]], [[ICMP]] ; CHECK-NEXT: [[SADDE2:%[0-9]+]]:_(s32), [[SADDE3:%[0-9]+]]:_(s1) = G_SADDE [[UV3]], [[UV5]], [[ICMP1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDE]](s32), [[SADDE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SADDE1]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SADDE3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -170,12 +170,9 @@ ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -237,9 +234,6 @@ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) @@ -262,11 +256,10 @@ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[XOR2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -357,16 +350,11 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[XOR2]](s1) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[XOR3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -402,12 +390,9 @@ ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV7]](s32), [[C]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -1461,10 +1461,10 @@ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP3]](s1) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP5]](s1) ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C]] + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) @@ -1518,12 +1518,12 @@ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP5]](s1) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP8]](s1) ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C]] + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C]] + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -307,29 +307,68 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV4]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST3]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV8]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x s16>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST5]](<2 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV12]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS2]](<4 x s16>) + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST7]](<2 x s32>) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV16]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS3]](<4 x s16>) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST9]](<2 x s32>) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV18]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C2]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS4]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -972,53 +972,92 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST3]](<2 x s32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x s16>) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST5]](<2 x s32>) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST6]](<2 x s32>) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x s16>) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST7]](<2 x s32>) + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C2]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<4 x s16>) ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST2]](<2 x s32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST3]](<2 x s32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST4]](<2 x s32>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<4 x s16>) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST5]](<2 x s32>) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir @@ -65,11 +65,9 @@ ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH1]](s32), [[ASHR1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX9-LABEL: name: test_smulo_v2s32 @@ -89,11 +87,9 @@ ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32) ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SMULH1]](s32), [[ASHR1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -223,16 +219,14 @@ ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX8-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG6]](s32), [[SEXT_INREG7]](s32) + ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; GFX8-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG8]](s32), [[SEXT_INREG9]](s32) + ; GFX8-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX8-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG6]](s32), [[SEXT_INREG7]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_smulo_v2s16 @@ -252,14 +246,12 @@ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 16 ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG6]](s32), [[SEXT_INREG7]](s32) - ; GFX9-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 - ; GFX9-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 16 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG8]](s32), [[SEXT_INREG9]](s32) + ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) + ; GFX9-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL]], 16 + ; GFX9-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 16 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG6]](s32), [[SEXT_INREG7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir @@ -51,12 +51,9 @@ ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[UV2]], [[UV4]], [[ICMP]] ; CHECK-NEXT: [[SSUBE2:%[0-9]+]]:_(s32), [[SSUBE3:%[0-9]+]]:_(s1) = G_SSUBE [[UV3]], [[UV5]], [[ICMP1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBE]](s32), [[SSUBE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SSUBE1]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SSUBE3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -170,12 +170,9 @@ ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -237,9 +234,6 @@ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) @@ -262,11 +256,10 @@ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[XOR2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -357,16 +350,11 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[XOR2]](s1) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[XOR3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -402,12 +390,9 @@ ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV7]](s32), [[C]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[XOR1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir @@ -51,12 +51,9 @@ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV4]], [[ICMP]] ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV5]], [[ICMP1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDE]](s32), [[UADDE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDE1]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDE3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -149,12 +149,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -200,9 +197,6 @@ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[AND7]] ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND8]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) @@ -224,11 +218,10 @@ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -297,16 +290,11 @@ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -333,12 +321,9 @@ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDO]](s32), [[UADDO2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO3]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -61,12 +61,9 @@ ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX9-LABEL: name: test_umulo_v2s32 @@ -84,12 +81,9 @@ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -283,12 +277,9 @@ ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT10]](s64), [[ZEXT11]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) ; GFX9-LABEL: name: test_umulo_v2s64 @@ -368,12 +359,9 @@ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] - ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT10]](s64), [[ZEXT11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -563,17 +551,14 @@ ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32) + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umulo_v2s16 @@ -594,15 +579,12 @@ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir @@ -51,12 +51,9 @@ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV4]], [[ICMP]] ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[ICMP1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBE]](s32), [[USUBE2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBE1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBE3]](s1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBE1]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBE3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -149,12 +149,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -200,9 +197,6 @@ ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND6]], [[AND7]] ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND8]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) @@ -224,11 +218,10 @@ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -297,16 +290,11 @@ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -333,12 +321,9 @@ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] ; CHECK-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBO]](s32), [[USUBO2]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBO1]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBO3]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBO1]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBO3]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1