Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -203,6 +203,10 @@ LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2785,6 +2785,65 @@ return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector ConcatOps; + SmallVector SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -2960,6 +3019,8 @@ return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -58,6 +58,14 @@ }; } +static LegalityPredicate isWideVec16(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getScalarType(); + return EltTy.getSizeInBits() == 16 && Ty.getNumElements() > 2; + }; +} + static LegalizeMutation oneMoreElement(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; @@ -931,7 +939,8 @@ .legalForCartesianProduct(AllS32Vectors, {S32}) .legalForCartesianProduct(AllS64Vectors, {S64}) .clampNumElements(0, V16S32, V32S32) - .clampNumElements(0, V2S64, V16S64); + .clampNumElements(0, V2S64, V16S64) + .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)); if (ST.hasScalarPackInsts()) BuildVector.legalFor({V2S16, S32}); Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -623,25 +623,28 @@ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[AND1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[AND2]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -672,13 +675,13 @@ ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -771,8 +774,10 @@ ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[ASHR6]], [[AND3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -799,8 +804,10 @@ ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16), [[ASHR2]](s16), [[ASHR3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[ASHR3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ashr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir @@ -822,3 +822,303 @@ %4:_(<4 x s128>) = G_BUILD_VECTOR %0, %1, %2, %3 S_NOP 0, implicit %4 ... + +--- +name: build_vector_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: build_vector_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 + S_NOP 0, implicit %4 +... + +--- +name: build_vector_v3s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: build_vector_v3s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CHECK: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5 + S_NOP 0, implicit %6 +... + +--- +name: build_vector_v4s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: build_vector_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s16) = G_TRUNC %0 + %5:_(s16) = G_TRUNC %1 + %6:_(s16) = G_TRUNC %2 + %7:_(s16) = G_TRUNC %3 + %8:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %6, %7 + S_NOP 0, implicit %8 +... + +--- +name: build_vector_v5s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: build_vector_v5s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: S_NOP 0, implicit [[EXTRACT]](<5 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s16) = G_TRUNC %0 + %6:_(s16) = G_TRUNC %1 + %7:_(s16) = G_TRUNC %2 + %8:_(s16) = G_TRUNC %3 + %9:_(s16) = G_TRUNC %4 + %10:_(<5 x s16>) = G_BUILD_VECTOR %5, %6, %7, %8, %9 + S_NOP 0, implicit %10 +... + +--- +name: build_vector_v7s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + + ; CHECK-LABEL: name: build_vector_v7s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<7 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<8 x s16>), 0 + ; CHECK: S_NOP 0, implicit [[EXTRACT]](<7 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s16) = G_TRUNC %0 + %8:_(s16) = G_TRUNC %1 + %9:_(s16) = G_TRUNC %2 + %10:_(s16) = G_TRUNC %3 + %11:_(s16) = G_TRUNC %4 + %12:_(s16) = G_TRUNC %5 + %13:_(s16) = G_TRUNC %6 + %14:_(<7 x s16>) = G_BUILD_VECTOR %7, %8, %9, %10, %11, %12, %13 + S_NOP 0, implicit %14 +... + +--- +name: build_vector_v8s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; CHECK-LABEL: name: build_vector_v8s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s16) = G_TRUNC %0 + %9:_(s16) = G_TRUNC %1 + %10:_(s16) = G_TRUNC %2 + %11:_(s16) = G_TRUNC %3 + %12:_(s16) = G_TRUNC %4 + %13:_(s16) = G_TRUNC %5 + %14:_(s16) = G_TRUNC %6 + %15:_(s16) = G_TRUNC %7 + %16:_(<8 x s16>) = G_BUILD_VECTOR %8, %9, %10, %11, %12, %13, %14, %15 + S_NOP 0, implicit %16 +... + +--- +name: build_vector_v16s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + + ; CHECK-LABEL: name: build_vector_v16s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) + ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) + ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC14]](s16), [[TRUNC15]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) + ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s32) = COPY $vgpr8 + %9:_(s32) = COPY $vgpr9 + %10:_(s32) = COPY $vgpr10 + %11:_(s32) = COPY $vgpr11 + %12:_(s32) = COPY $vgpr12 + %13:_(s32) = COPY $vgpr13 + %14:_(s32) = COPY $vgpr14 + %15:_(s32) = COPY $vgpr15 + %16:_(s16) = G_TRUNC %0 + %17:_(s16) = G_TRUNC %1 + %18:_(s16) = G_TRUNC %2 + %19:_(s16) = G_TRUNC %3 + %20:_(s16) = G_TRUNC %4 + %21:_(s16) = G_TRUNC %5 + %22:_(s16) = G_TRUNC %6 + %23:_(s16) = G_TRUNC %7 + %24:_(s16) = G_TRUNC %8 + %25:_(s16) = G_TRUNC %9 + %26:_(s16) = G_TRUNC %10 + %27:_(s16) = G_TRUNC %11 + %28:_(s16) = G_TRUNC %12 + %29:_(s16) = G_TRUNC %13 + %30:_(s16) = G_TRUNC %14 + %31:_(s16) = G_TRUNC %15 + %32:_(<16 x s16>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28, %29, %30, %31 + S_NOP 0, implicit %32 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -514,15 +514,18 @@ ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[TRUNC1]](<3 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[DEF1]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 @@ -539,18 +542,22 @@ ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s1>) = G_TRUNC [[DEF]](<6 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s1>) = G_EXTRACT [[TRUNC]](<6 x s1>), 0 ; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1), [[UV3:%[0-9]+]]:_(s1), [[UV4:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[EXTRACT]](<5 x s1>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s1) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>) - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[TRUNC1]](<5 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s1) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s1) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s1) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s1) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[ANYEXT3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT4]](s16), [[DEF1]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF2]](<6 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT1]](<5 x s16>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT5]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -358,12 +358,12 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fadd_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -393,12 +393,12 @@ ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]] ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC4]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -479,8 +479,10 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fadd_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -507,8 +509,10 @@ ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC5]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC6]] ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fadd_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -235,12 +235,12 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fcanonicalize_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -258,12 +258,12 @@ ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -321,8 +321,10 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fcanonicalize_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -339,8 +341,10 @@ ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16), [[FCANONICALIZE2]](s16), [[FCANONICALIZE3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE2]](s16), [[FCANONICALIZE3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -344,12 +344,12 @@ ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fcos_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -374,12 +374,12 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fcos_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -401,12 +401,12 @@ ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCOS %0 S_NOP 0, implicit %1 @@ -451,8 +451,10 @@ ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fcos_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -478,8 +480,10 @@ ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; VI: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16), [[INT5]](s16), [[INT7]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fcos_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -501,8 +505,10 @@ ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) ; GFX9: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16), [[INT2]](s16), [[INT3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FCOS %0 $vgpr0_vgpr1 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -257,12 +257,12 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_ffloor_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -280,12 +280,12 @@ ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_ffloor_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -303,12 +303,12 @@ ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FFLOOR %0 S_NOP 0, implicit %1 @@ -344,8 +344,10 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ffloor_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -362,8 +364,10 @@ ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] ; VI: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16), [[FFLOOR2]](s16), [[FFLOOR3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[FFLOOR3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ffloor_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -380,8 +384,10 @@ ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] ; GFX9: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16), [[FFLOOR2]](s16), [[FFLOOR3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[FFLOOR3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -436,12 +436,12 @@ ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC9]](<3 x s16>) + ; SI: [[DEF6:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF6]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) ; VI-LABEL: name: test_fma_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -483,12 +483,12 @@ ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC9]](<3 x s16>) + ; VI: [[DEF6:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA2]](s16), [[DEF6]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -592,8 +592,10 @@ ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fma_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -630,8 +632,10 @@ ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] ; VI: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16), [[FMA2]](s16), [[FMA3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA2]](s16), [[FMA3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir @@ -312,8 +312,10 @@ ; SI-F16DENORM: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI-F16DENORM: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] ; SI-F16DENORM: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16), [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; SI-F16DENORM: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; SI-F16DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; SI-F16DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI-F16DENORM: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; SI-F16FLUSH-LABEL: name: test_fmad_v4s16 ; SI-F16FLUSH: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -378,8 +380,10 @@ ; SI-F16FLUSH: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI-F16FLUSH: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] ; SI-F16FLUSH: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16), [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; SI-F16FLUSH: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; SI-F16FLUSH: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; SI-F16FLUSH: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI-F16FLUSH: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-F16DENORM-LABEL: name: test_fmad_v4s16 ; VI-F16DENORM: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI-F16DENORM: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -412,8 +416,10 @@ ; VI-F16DENORM: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; VI-F16DENORM: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI-F16DENORM: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16) - ; VI-F16DENORM: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16) + ; VI-F16DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %18(s16), %19(s16) + ; VI-F16DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI-F16DENORM: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-F16DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; VI-F16DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC11]] ; VI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] @@ -458,8 +464,10 @@ ; VI-F16FLUSH: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] ; VI-F16FLUSH: [[FMAD2:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] ; VI-F16FLUSH: [[FMAD3:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16), [[FMAD2]](s16), [[FMAD3]](s16) - ; VI-F16FLUSH: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) + ; VI-F16FLUSH: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD2]](s16), [[FMAD3]](s16) + ; VI-F16FLUSH: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI-F16FLUSH: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-LABEL: name: test_fmad_v4s16 ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -500,8 +508,10 @@ ; GFX10: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] ; GFX10: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; GFX10: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -420,13 +420,13 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -463,13 +463,13 @@ ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -561,8 +561,10 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -597,8 +599,10 @@ ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16), [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -420,13 +420,13 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -463,13 +463,13 @@ ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -561,8 +561,10 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -597,8 +599,10 @@ ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16), [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -357,12 +357,12 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fmul_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -392,12 +392,12 @@ ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]] ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC4]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fmul_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -478,8 +478,10 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fmul_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -506,8 +508,10 @@ ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16), [[FMUL2]](s16), [[FMUL3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[FMUL3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fmul_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -344,12 +344,12 @@ ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fsin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -374,12 +374,12 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fsin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -401,12 +401,12 @@ ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSIN %0 S_NOP 0, implicit %1 @@ -451,8 +451,10 @@ ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -478,8 +480,10 @@ ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; VI: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16), [[INT5]](s16), [[INT7]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -501,8 +505,10 @@ ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) ; GFX9: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16), [[INT2]](s16), [[INT3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSIN %0 $vgpr0_vgpr1 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -257,12 +257,12 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fsqrt_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -280,12 +280,12 @@ ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fsqrt_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -303,12 +303,12 @@ ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSQRT %0 S_NOP 0, implicit %1 @@ -344,8 +344,10 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsqrt_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -362,8 +364,10 @@ ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[FSQRT3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsqrt_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -380,8 +384,10 @@ ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[FSQRT3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -405,12 +405,12 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fsub_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -443,12 +443,12 @@ ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fsub_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -481,12 +481,12 @@ ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; GFX9: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FSUB %0, %1 @@ -541,8 +541,10 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsub_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -573,8 +575,10 @@ ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsub_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -605,8 +609,10 @@ ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] ; GFX9: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FSUB %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -4650,87 +4650,102 @@ ; CI-LABEL: name: test_load_constant_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 4) @@ -5098,8 +5113,10 @@ ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5116,8 +5133,10 @@ ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5134,8 +5153,10 @@ ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5152,8 +5173,10 @@ ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5170,8 +5193,10 @@ ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -5492,8 +5517,12 @@ ; CI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_constant_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -5526,8 +5555,12 @@ ; VI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-LABEL: name: test_load_constant_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -5560,8 +5593,12 @@ ; GFX9: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -5594,8 +5631,12 @@ ; CI-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -5628,8 +5669,12 @@ ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -4740,87 +4740,102 @@ ; CI-LABEL: name: test_load_flat_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 0) @@ -5188,8 +5203,10 @@ ; CI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5206,8 +5223,10 @@ ; VI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5224,8 +5243,10 @@ ; GFX9: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5242,8 +5263,10 @@ ; CI-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5260,8 +5283,10 @@ ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -5582,8 +5607,12 @@ ; CI: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_flat_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -5616,8 +5645,12 @@ ; VI: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-LABEL: name: test_load_flat_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -5650,8 +5683,12 @@ ; GFX9: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -5684,8 +5721,12 @@ ; CI-MESA: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -5718,8 +5759,12 @@ ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -4577,19 +4577,22 @@ ; SI-LABEL: name: test_load_global_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -4600,36 +4603,42 @@ ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -4640,19 +4649,22 @@ ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 1) @@ -4990,8 +5002,10 @@ ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) @@ -5012,8 +5026,10 @@ ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) @@ -5030,8 +5046,10 @@ ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) @@ -5052,8 +5070,10 @@ ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -5332,8 +5352,12 @@ ; SI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-HSA-LABEL: name: test_load_global_v8s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5366,8 +5390,12 @@ ; CI-HSA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; CI-HSA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; CI-HSA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-HSA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_global_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5400,8 +5428,12 @@ ; CI-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_global_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5434,8 +5466,12 @@ ; VI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5468,8 +5504,12 @@ ; GFX9-HSA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-HSA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; GFX9-HSA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5502,8 +5542,12 @@ ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -5532,87 +5532,102 @@ ; SI-LABEL: name: test_load_local_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-DS128: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 3) @@ -5984,8 +5999,10 @@ ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6002,8 +6019,10 @@ ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6020,8 +6039,10 @@ ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6038,8 +6059,10 @@ ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6056,8 +6079,10 @@ ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4615,70 +4615,82 @@ ; SI-LABEL: name: test_load_private_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 5) @@ -4696,70 +4708,82 @@ ; SI-LABEL: name: test_load_private_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 5) @@ -4816,13 +4840,13 @@ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -4866,13 +4890,13 @@ ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; CI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -4910,13 +4934,13 @@ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -4954,13 +4978,13 @@ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 5) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -609,23 +609,26 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -656,13 +659,13 @@ ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR6]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -751,8 +754,10 @@ ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[AND6]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_lshr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -779,8 +784,10 @@ ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16), [[LSHR6]](s16), [[LSHR7]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR6]](s16), [[LSHR7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -144,25 +144,28 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY7]] - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[TRUNC]](<3 x s16>), 0 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) + ; CHECK: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[PHI]](<4 x s16>), 0 - ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[PHI]](<4 x s16>), 0 + ; CHECK: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -236,10 +239,12 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[COPY9]] ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[BUILD_VECTOR]](<4 x s16>), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](<4 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -708,8 +713,10 @@ ; CHECK: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] ; CHECK: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] ; CHECK: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32), [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32), [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) - ; CHECK: [[UV128:%[0-9]+]]:_(<16 x s32>), [[UV129:%[0-9]+]]:_(<16 x s32>), [[UV130:%[0-9]+]]:_(<16 x s32>), [[UV131:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s32>) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32), [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) + ; CHECK: [[UV128:%[0-9]+]]:_(<16 x s32>), [[UV129:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) + ; CHECK: [[UV130:%[0-9]+]]:_(<16 x s32>), [[UV131:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<32 x s32>) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[UV128]](<16 x s32>), %bb.1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -596,21 +596,24 @@ ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[AND1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_shl_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -641,13 +644,13 @@ ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -732,8 +735,10 @@ ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[AND3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_shl_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -760,8 +765,10 @@ ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16), [[SHL2]](s16), [[SHL3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL2]](s16), [[SHL3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_shl_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -341,8 +341,10 @@ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR9]](s32), [[ASHR10]](s32), [[ASHR11]](s32) ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR3]](<3 x s32>), 0 ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT5]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR4]](<4 x s16>) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -355,6 +355,7 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -362,6 +363,7 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[ASHR2]], [[ASHR3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -369,12 +371,13 @@ ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[ASHR4]], [[ASHR5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMAX1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMAX2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -404,12 +407,12 @@ ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -484,8 +487,10 @@ ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[ASHR6]], [[ASHR7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -512,8 +517,10 @@ ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16), [[SMAX2]](s16), [[SMAX3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX2]](s16), [[SMAX3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -355,6 +355,7 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -362,6 +363,7 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[ASHR2]], [[ASHR3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -369,12 +371,13 @@ ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[ASHR4]], [[ASHR5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMIN1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMIN2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -404,12 +407,12 @@ ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -484,8 +487,10 @@ ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[ASHR6]], [[ASHR7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -512,8 +517,10 @@ ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16), [[SMIN2]](s16), [[SMIN3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN2]](s16), [[SMIN3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -337,22 +337,25 @@ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMAX1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMAX2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -382,12 +385,12 @@ ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -455,8 +458,10 @@ ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -483,8 +488,10 @@ ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16), [[UMAX2]](s16), [[UMAX3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX2]](s16), [[UMAX3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -337,22 +337,25 @@ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMIN1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMIN2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -382,12 +385,12 @@ ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -455,8 +458,10 @@ ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -483,8 +488,10 @@ ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16), [[UMIN2]](s16), [[UMIN3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN2]](s16), [[UMIN3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3