Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2562,6 +2562,9 @@ return Legalized; } +// Handles operands with different types, but all must have the same number of +// elements. There will be multiple type indexes. NarrowTy is expected to have +// the result element type. LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -2579,15 +2582,22 @@ SmallVector ExtractedRegs[3]; SmallVector Parts; + unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + // Break down all the sources into NarrowTy pieces we can operate on. This may // involve creating merges to a wider type, paddedd with undef. for (int I = 0; I != NumOps; ++I) { Register SrcReg = MI.getOperand(I + 1).getReg(); LLT SrcTy = MRI.getType(SrcReg); - GCDTys[I] = extractGCDType(ExtractedRegs[I], SrcTy, NarrowTy, SrcReg); + + // Each operand may have its own type, but only the number of elements + // matters. + LLT OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType()); + + GCDTys[I] = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. - LCMTys[I] = buildLCMMergePieces(SrcTy, NarrowTy, GCDTys[I], + LCMTys[I] = buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTys[I], ExtractedRegs[I], TargetOpcode::G_ANYEXT); } @@ -2597,7 +2607,10 @@ SmallVector InputRegs(NumOps, Register()); int NumParts = ExtractedRegs[0].size(); - const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + const LLT DstTy = MRI.getType(DstReg); + const unsigned DstSize = DstTy.getSizeInBits(); + LLT DstLCMTy = getLCMType(DstTy, NarrowTy); + const unsigned NarrowSize = NarrowTy.getSizeInBits(); // We widened the source registers to satisfy merge/unmerge size @@ -2620,7 +2633,7 @@ ResultRegs.append(NumUndefParts, MIRBuilder.buildUndef(NarrowTy).getReg(0)); // Extract the possibly padded result to the original result register. - buildWidenedRemergeToDst(DstReg, LCMTys[0], ResultRegs); + buildWidenedRemergeToDst(DstReg, DstLCMTy, ResultRegs); MI.eraseFromParent(); return Legalized; @@ -3193,6 +3206,7 @@ switch (MI.getOpcode()) { case G_IMPLICIT_DEF: return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); + case G_TRUNC: case G_AND: case G_OR: case G_XOR: Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -476,6 +476,7 @@ .lower(); getActionDefinitionsBuilder(G_TRUNC) + .clampMaxNumElements(0, S16, 2) .alwaysLegal(); getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -401,9 +401,10 @@ ; CHECK-LABEL: name: test_unmerge_values_s8_of_trunc_v4s16_concat_vectors_v2s32_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<2 x s16>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<2 x s16>) ; CHECK: S_ENDPGM 0, implicit [[UV]](s8), implicit [[UV1]](s8), implicit [[UV2]](s8), implicit [[UV3]](s8), implicit [[UV4]](s8), implicit [[UV5]](s8), implicit [[UV6]](s8), implicit [[UV7]](s8) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -449,9 +450,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>), [[COPY2]](<2 x s32>), [[COPY3]](<2 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](<8 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY2]](<2 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY3]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s16>) ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -472,9 +476,12 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -403,31 +403,48 @@ ; CHECK-LABEL: name: test_and_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC1]](<6 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF1]](<6 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF2]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF2]](<6 x s32>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV8]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC6]](<2 x s16>), [[TRUNC7]](<2 x s16>), [[TRUNC8]](<2 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS2]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS2]], [[EXTRACT1]](<5 x s16>), 0 + ; CHECK: [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK: [[UV11:%[0-9]+]]:_(<3 x s16>), [[UV12:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV9]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV11]](<3 x s16>), 0 ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]] ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND]](<4 x s16>), 0 - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV1]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0 + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV10]](<3 x s16>), 0 + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND1]](<4 x s16>), 0 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC3:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF4]](<8 x s32>) - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC3]], [[EXTRACT4]](<5 x s16>), 0 + ; CHECK: [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>), [[UV16:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF4]](<8 x s32>) + ; CHECK: [[TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV13]](<2 x s32>) + ; CHECK: [[TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV14]](<2 x s32>) + ; CHECK: [[TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV15]](<2 x s32>) + ; CHECK: [[TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV16]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC9]](<2 x s16>), [[TRUNC10]](<2 x s16>), [[TRUNC11]](<2 x s16>), [[TRUNC12]](<2 x s16>) + ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[CONCAT_VECTORS4]], [[EXTRACT4]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -415,14 +415,22 @@ ; CHECK-LABEL: name: test_bitcast_v3s16_to_s48 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -446,8 +446,11 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[TRUNC]](<4 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF @@ -462,8 +465,11 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[TRUNC]](<4 x s16>), 16 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 16 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF @@ -478,8 +484,11 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[TRUNC]](<4 x s16>), 32 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 32 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF @@ -494,8 +503,11 @@ bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset24 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[TRUNC]](<4 x s16>), 48 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 48 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF @@ -573,8 +585,12 @@ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT1]](<5 x s16>), 0 + ; CHECK: [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF1]](<6 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS1]], [[EXTRACT1]](<5 x s16>), 0 ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT5]](s32) @@ -603,8 +619,12 @@ bb.0: ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 32 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 32 ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 @@ -940,11 +960,19 @@ ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF1]](<6 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS1]], [[EXTRACT]](<5 x s16>), 0 ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 ; CHECK: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -410,11 +410,20 @@ ; CHECK-LABEL: name: test_implicit_def_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF1]](<8 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>), [[TRUNC6]](<2 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[CONCAT_VECTORS1]], [[EXTRACT]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF @@ -429,10 +438,19 @@ ; CHECK-LABEL: name: test_implicit_def_v6s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF1]](<8 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC1]], [[TRUNC]](<6 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>), [[TRUNC6]](<2 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[CONCAT_VECTORS1]], [[CONCAT_VECTORS]](<6 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF @@ -447,8 +465,13 @@ ; CHECK-LABEL: name: test_implicit_def_v8s16 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF]](<8 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<8 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<8 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<8 x s16>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -403,31 +403,48 @@ ; CHECK-LABEL: name: test_or_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC1]](<6 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF1]](<6 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF2]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF2]](<6 x s32>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV8]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC6]](<2 x s16>), [[TRUNC7]](<2 x s16>), [[TRUNC8]](<2 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS2]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS2]], [[EXTRACT1]](<5 x s16>), 0 + ; CHECK: [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK: [[UV11:%[0-9]+]]:_(<3 x s16>), [[UV12:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV9]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV11]](<3 x s16>), 0 ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]] ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR]](<4 x s16>), 0 - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV1]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0 + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV10]](<3 x s16>), 0 + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 ; CHECK: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR1]](<4 x s16>), 0 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC3:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF4]](<8 x s32>) - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC3]], [[EXTRACT4]](<5 x s16>), 0 + ; CHECK: [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>), [[UV16:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF4]](<8 x s32>) + ; CHECK: [[TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV13]](<2 x s32>) + ; CHECK: [[TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV14]](<2 x s32>) + ; CHECK: [[TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV15]](<2 x s32>) + ; CHECK: [[TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV16]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC9]](<2 x s16>), [[TRUNC10]](<2 x s16>), [[TRUNC11]](<2 x s16>), [[TRUNC12]](<2 x s16>) + ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[CONCAT_VECTORS4]], [[EXTRACT4]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -979,54 +979,60 @@ ; GFX9-LABEL: name: test_sext_inreg_v6s16_1 ; GFX9: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; GFX9: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<6 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) + ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[C]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV4]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV2]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV5]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) - ; GFX9: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<6 x s16>) ; GFX8-LABEL: name: test_sext_inreg_v6s16_1 ; GFX8: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; GFX8: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; GFX8: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>) + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>) + ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC2]](<2 x s16>) + ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16) ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16) ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16) + ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC6]], [[C1]](s16) ; GFX8: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C1]](s16) - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16) + ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC7]], [[C1]](s16) ; GFX8: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL4]], [[C1]](s16) - ; GFX8: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC6]], [[C1]](s16) + ; GFX8: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[C1]](s16) ; GFX8: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[SHL5]], [[C1]](s16) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) @@ -1047,14 +1053,16 @@ ; GFX8: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; GFX6-LABEL: name: test_sext_inreg_v6s16_1 ; GFX6: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; GFX6: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>) ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>) ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC2]](<2 x s16>) ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -5826,9 +5826,15 @@ ; SI-LABEL: name: test_store_global_v10s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[DEF:%[0-9]+]]:_(<10 x s32>) = G_IMPLICIT_DEF - ; SI: [[TRUNC:%[0-9]+]]:_(<10 x s16>) = G_TRUNC [[DEF]](<10 x s32>) - ; SI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 128 + ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<10 x s32>) + ; SI: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; SI: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; SI: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; SI: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; SI: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 128 ; SI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -5836,9 +5842,15 @@ ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[DEF:%[0-9]+]]:_(<10 x s32>) = G_IMPLICIT_DEF - ; CI: [[TRUNC:%[0-9]+]]:_(<10 x s16>) = G_TRUNC [[DEF]](<10 x s32>) - ; CI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 128 + ; CI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<10 x s32>) + ; CI: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CI: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CI: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CI: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CI: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 0 + ; CI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 128 ; CI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -5846,9 +5858,15 @@ ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<10 x s32>) = G_IMPLICIT_DEF - ; VI: [[TRUNC:%[0-9]+]]:_(<10 x s16>) = G_TRUNC [[DEF]](<10 x s32>) - ; VI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 128 + ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<10 x s32>) + ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; VI: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; VI: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; VI: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; VI: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 0 + ; VI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 128 ; VI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -5856,9 +5874,15 @@ ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[DEF:%[0-9]+]]:_(<10 x s32>) = G_IMPLICIT_DEF - ; GFX9: [[TRUNC:%[0-9]+]]:_(<10 x s16>) = G_TRUNC [[DEF]](<10 x s32>) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[TRUNC]](<10 x s16>), 128 + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<10 x s32>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 0 + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 128 ; GFX9: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -5924,9 +5948,16 @@ ; SI-LABEL: name: test_store_global_v12s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[DEF:%[0-9]+]]:_(<12 x s32>) = G_IMPLICIT_DEF - ; SI: [[TRUNC:%[0-9]+]]:_(<12 x s16>) = G_TRUNC [[DEF]](<12 x s32>) - ; SI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 128 + ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<12 x s32>) + ; SI: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; SI: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; SI: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; SI: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; SI: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; SI: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 128 ; SI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -5934,9 +5965,16 @@ ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[DEF:%[0-9]+]]:_(<12 x s32>) = G_IMPLICIT_DEF - ; CI: [[TRUNC:%[0-9]+]]:_(<12 x s16>) = G_TRUNC [[DEF]](<12 x s32>) - ; CI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 128 + ; CI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<12 x s32>) + ; CI: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CI: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CI: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CI: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CI: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CI: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 0 + ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 128 ; CI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -5944,9 +5982,16 @@ ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<12 x s32>) = G_IMPLICIT_DEF - ; VI: [[TRUNC:%[0-9]+]]:_(<12 x s16>) = G_TRUNC [[DEF]](<12 x s32>) - ; VI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 128 + ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<12 x s32>) + ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; VI: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; VI: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; VI: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; VI: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; VI: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 0 + ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 128 ; VI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -5954,9 +5999,16 @@ ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[DEF:%[0-9]+]]:_(<12 x s32>) = G_IMPLICIT_DEF - ; GFX9: [[TRUNC:%[0-9]+]]:_(<12 x s16>) = G_TRUNC [[DEF]](<12 x s32>) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[TRUNC]](<12 x s16>), 128 + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<12 x s32>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 0 + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<12 x s16>), 128 ; GFX9: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -46,6 +46,41 @@ $vgpr0 = COPY %1 ... +--- +name: test_trunc_v3s32_to_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_trunc_v3s32_to_v3s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>) = G_TRUNC %0 + %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 + S_ENDPGM 0, implicit %2, implicit %3, implicit %4 +... + --- name: test_trunc_v4s32_to_v4s16 body: | @@ -54,13 +89,69 @@ ; CHECK-LABEL: name: test_trunc_v4s32_to_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>) - ; CHECK: $vgpr0_vgpr1 = COPY [[TRUNC]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) + ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s16>) = G_TRUNC %0 $vgpr0_vgpr1 = COPY %1 ... +--- +name: test_trunc_v8s32_to_v8s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_trunc_v8s32_to_v8s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s16>) = G_TRUNC %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_trunc_v2s64_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_trunc_v2s64_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s64>) + ; CHECK: $vgpr0 = COPY [[TRUNC]](<2 x s16>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s16>) = G_TRUNC %0 + $vgpr0 = COPY %1 +... + +--- +name: test_trunc_v4s64_to_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_trunc_v4s64_to_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s64>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s64>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) + ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<4 x s16>) = G_TRUNC %0 + $vgpr0_vgpr1 = COPY %1 +... + --- name: test_trunc_s64_to_s1 body: | Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -403,31 +403,48 @@ ; CHECK-LABEL: name: test_xor_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<6 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC1]](<6 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF1]](<6 x s32>) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF2]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF2]](<6 x s32>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV8]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC6]](<2 x s16>), [[TRUNC7]](<2 x s16>), [[TRUNC8]](<2 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS2]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS2]], [[EXTRACT1]](<5 x s16>), 0 + ; CHECK: [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK: [[UV11:%[0-9]+]]:_(<3 x s16>), [[UV12:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV9]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV11]](<3 x s16>), 0 ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]] ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[XOR]](<4 x s16>), 0 - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV1]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0 + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV10]](<3 x s16>), 0 + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 ; CHECK: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]] ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[XOR1]](<4 x s16>), 0 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC3:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF4]](<8 x s32>) - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC3]], [[EXTRACT4]](<5 x s16>), 0 + ; CHECK: [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>), [[UV16:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF4]](<8 x s32>) + ; CHECK: [[TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV13]](<2 x s32>) + ; CHECK: [[TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV14]](<2 x s32>) + ; CHECK: [[TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV15]](<2 x s32>) + ; CHECK: [[TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV16]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC9]](<2 x s16>), [[TRUNC10]](<2 x s16>), [[TRUNC11]](<2 x s16>), [[TRUNC12]](<2 x s16>) + ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[CONCAT_VECTORS4]], [[EXTRACT4]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll @@ -117,3 +117,71 @@ %cast = bitcast <2 x i16> %trunc to i32 ret i32 %cast } + +; ; FIXME: G_INSERT mishandled +; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) { +; %trunc = trunc <3 x i32> %src to <3 x i16> +; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> +; %cast = bitcast <4 x i16> %ext to <2 x i32> +; ret <2 x i32> %cast +; } + +; ; FIXME: G_INSERT mishandled +; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) { +; %trunc = trunc <3 x i32> %src to <3 x i16> +; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> +; %cast = bitcast <4 x i16> %ext to <2 x i32> +; ret <2 x i32> %cast +; } + +define <2 x i32> @v_trunc_v4i32_to_v4i16(<4 x i32> %src) { +; GFX7-LABEL: v_trunc_v4i32_to_v4i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, v0, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_trunc_v4i32_to_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_sdwa v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX8-NEXT: v_mov_b32_sdwa v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX8-NEXT: v_mov_b32_e32 v1, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %trunc = trunc <4 x i32> %src to <4 x i16> + %cast = bitcast <4 x i16> %trunc to <2 x i32> + ret <2 x i32> %cast +} + +define amdgpu_ps <2 x i32> @s_trunc_v4i32_to_v4i16(<4 x i32> inreg %src) { +; GFX7-LABEL: s_trunc_v4i32_to_v4i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_and_b32 s0, s0, s4 +; GFX7-NEXT: s_or_b32 s0, s1, s0 +; GFX7-NEXT: s_lshl_b32 s1, s3, 16 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: s_or_b32 s1, s1, s2 +; GFX7-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_trunc_v4i32_to_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_and_b32 s0, s0, s4 +; GFX8-NEXT: s_or_b32 s0, s1, s0 +; GFX8-NEXT: s_lshl_b32 s1, s3, 16 +; GFX8-NEXT: s_and_b32 s2, s2, s4 +; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: ; return to shader part epilog + %trunc = trunc <4 x i32> %src to <4 x i16> + %cast = bitcast <4 x i16> %trunc to <2 x i32> + ret <2 x i32> %cast +}