Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -86,7 +86,7 @@ /// Legalize a vector instruction by increasing the number of vector elements /// involved and ignoring the added elements later. LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy); + LLT MoreTy); /// Expose MIRBuilder so clients can set their own RecordInsertInstruction /// functions @@ -119,6 +119,11 @@ // extending back with \p ExtOpcode. void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode); + /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a + /// Def by performing it with additional vector elements and extracting the + /// result elements, and replacing the vreg of the operand in place. + void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx); + LegalizeResult widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); LegalizeResult Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -99,6 +99,9 @@ case FewerElements: LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); + case MoreElements: + LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); + return moreElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized @@ -966,6 +969,15 @@ MO.setReg(DstTrunc); } +void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); + MO.setReg(DstExt); +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -2471,6 +2483,23 @@ return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy) { + MIRBuilder.setInstr(MI); + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case TargetOpcode::G_IMPLICIT_DEF: { + Observer.changingInstr(MI); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + default: + return UnableToLegalize; + } +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { unsigned DstReg = MI.getOperand(0).getReg(); Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -38,6 +38,31 @@ }; } +static LegalityPredicate elementIsSmall(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + return Ty.isVector() && Ty.getElementType().getSizeInBits() < 32; + }; +} + +static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + return Ty.isVector() && + Ty.getNumElements() % 2 != 0 && + Ty.getElementType().getSizeInBits() < 32; + }; +} + +static LegalizeMutation oneMoreElement(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getElementType(); + return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy)); + }; +} + + AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM) { using namespace TargetOpcode; @@ -135,11 +160,10 @@ .legalFor({S32, S64, S16}); - getActionDefinitionsBuilder(G_IMPLICIT_DEF) .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr, ConstantPtr, LocalPtr, FlatPtr, PrivatePtr}) - .legalFor({LLT::vector(3, 16)})// FIXME: Hack + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .clampScalarOrElt(0, S32, S512) .legalIf(isMultiple32(0)); Index: test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -280,10 +280,12 @@ bb.0: ; CHECK-LABEL: name: test_and_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -623,8 +623,9 @@ ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT3]], [[EXTRACT5]](s16) - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF]], [[ASHR]](<2 x s16>), 0 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT6]], [[ASHR]](<2 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[ASHR1]](s16), 32 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s16>), 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -191,11 +191,12 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v3s8_offset16 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[DEF]](<3 x s32>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<3 x s16>) = G_ANYEXT [[TRUNC]](<3 x s8>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<3 x s16>), 32 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT]](s16) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<3 x s16>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<3 x s16>), 32 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT1]](s16) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF @@ -211,11 +212,12 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v5s1_offset4 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<5 x s1>) = G_TRUNC [[DEF]](<5 x s32>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<5 x s16>) = G_ANYEXT [[TRUNC]](<5 x s1>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<5 x s16>), 64 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[EXTRACT]](s16) + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s1>) = G_TRUNC [[DEF]](<6 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s1>) = G_EXTRACT [[TRUNC]](<6 x s1>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<5 x s16>) = G_ANYEXT [[EXTRACT]](<5 x s1>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<5 x s16>), 64 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[EXTRACT1]](s16) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s1) ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -205,8 +205,9 @@ bb.0: ; SI-LABEL: name: test_fabs_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) ; SI: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS]](s32) @@ -219,21 +220,24 @@ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; VI-LABEL: name: test_fabs_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[UV]] ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[UV1]] ; VI: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[UV2]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FABS]](s16), [[FABS1]](s16), [[FABS2]](s16) ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<3 x s16>), 0 - ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF1]], [[FABS]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<3 x s16>), 32 - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT1]] + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT2]] + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT1]], [[FABS]](<2 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT3]] ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FABS1]](s16), 32 ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -307,10 +307,12 @@ liveins: $vgpr0, $vgpr1 ; SI-LABEL: name: test_fadd_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] @@ -326,26 +328,31 @@ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; VI-LABEL: name: test_fadd_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV]], [[UV3]] ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[UV1]], [[UV4]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[UV2]], [[UV5]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16) ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; GFX9-LABEL: name: test_fadd_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF2:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF1]](<3 x s16>), 0 - ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[EXTRACT]], [[EXTRACT1]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF2]], [[FADD]](<2 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<3 x s16>), 32 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF1]](<3 x s16>), 32 - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[EXTRACT2]], [[EXTRACT3]] + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[EXTRACT3]], [[EXTRACT4]] + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[FADD]](<2 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 + ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[EXTRACT5]], [[EXTRACT6]] ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FADD1]](s16), 32 ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -364,12 +364,15 @@ bb.0: ; SI-LABEL: name: test_fma_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF2]](<3 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) + ; SI: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT2]](<3 x s16>) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) @@ -388,31 +391,38 @@ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; VI-LABEL: name: test_fma_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF2]](<3 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) + ; VI: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT2]](<3 x s16>) ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[UV]], [[UV3]], [[UV6]] ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[UV1]], [[UV4]], [[UV7]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[UV2]], [[UV5]], [[UV8]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16), [[FMA2]](s16) ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF2:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF3:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF2]](<3 x s16>), 0 - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[EXTRACT]], [[EXTRACT1]], [[EXTRACT2]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF3]], [[FMA]](<2 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<3 x s16>), 32 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF1]](<3 x s16>), 32 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF2]](<3 x s16>), 32 - ; GFX9: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[EXTRACT3]], [[EXTRACT4]], [[EXTRACT5]] + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF3]](<4 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[EXTRACT4]], [[EXTRACT5]], [[EXTRACT6]] + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT3]], [[FMA]](<2 x s16>), 0 + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 + ; GFX9: [[EXTRACT9:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT2]](<3 x s16>), 32 + ; GFX9: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[EXTRACT7]], [[EXTRACT8]], [[EXTRACT9]] ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FMA1]](s16), 32 ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -306,10 +306,12 @@ liveins: $vgpr0, $vgpr1 ; SI-LABEL: name: test_fmul_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] @@ -325,26 +327,31 @@ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; VI-LABEL: name: test_fmul_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[UV]], [[UV3]] ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[UV1]], [[UV4]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[UV2]], [[UV5]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16), [[FMUL2]](s16) ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; GFX9-LABEL: name: test_fmul_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF2:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF1]](<3 x s16>), 0 - ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[EXTRACT]], [[EXTRACT1]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF2]], [[FMUL]](<2 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<3 x s16>), 32 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF1]](<3 x s16>), 32 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[EXTRACT2]], [[EXTRACT3]] + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[EXTRACT3]], [[EXTRACT4]] + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[FMUL]](<2 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[EXTRACT5]], [[EXTRACT6]] ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FMUL1]](s16), 32 ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -218,8 +218,9 @@ bb.0: ; SI-LABEL: name: test_fneg_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) @@ -232,21 +233,24 @@ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; VI-LABEL: name: test_fneg_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV]] ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV1]] ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[UV2]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FNEG]](s16), [[FNEG1]](s16), [[FNEG2]](s16) ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; GFX9-LABEL: name: test_fneg_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<3 x s16>), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF1]], [[FNEG]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<3 x s16>), 32 - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT1]] + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT2]] + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT1]], [[FNEG]](<2 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT3]] ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FNEG1]](s16), 32 ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -61,8 +61,9 @@ liveins: $vgpr0 ; CHECK-LABEL: name: test_fpext_v3f16_to_v3f32 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) ; CHECK: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -208,8 +208,9 @@ bb.0: ; SI-LABEL: name: test_fsqrt_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) @@ -222,16 +223,18 @@ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; VI-LABEL: name: test_fsqrt_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16) ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; GFX9-LABEL: name: test_fsqrt_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -329,10 +329,12 @@ liveins: $vgpr0, $vgpr1 ; SI-LABEL: name: test_fsub_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) @@ -357,10 +359,12 @@ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16), [[FPTRUNC5]](s16) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; VI-LABEL: name: test_fsub_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV3]] ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV]], [[FNEG]] ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV4]] @@ -370,10 +374,12 @@ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16) ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) ; GFX9-LABEL: name: test_fsub_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; GFX9: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; GFX9: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV3]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV]], [[FNEG]] ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV4]] Index: test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -198,6 +198,60 @@ $vgpr0_vgpr1 = COPY %1 ... +--- +name: test_implicit_def_v3s1 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v3s1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s1>) = G_EXTRACT [[TRUNC]](<4 x s1>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s1>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s1) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s1) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s1) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s1>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_implicit_def_v2s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v2s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](<2 x s32>) + %0:_(<2 x s8>) = G_IMPLICIT_DEF + %1:_(<2 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_implicit_def_v3s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v3s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s8>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + --- name: test_implicit_def_v2s16 body: | @@ -216,9 +270,10 @@ bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[DEF]](<3 x s16>), 0 + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<4 x s16>) = G_IMPLICIT_DEF @@ -238,6 +293,43 @@ $vgpr0_vgpr1 = COPY %0 ... +--- +name: test_implicit_def_v5s16 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v5s16 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF1]](<8 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + %0:_(<5 x s16>) = G_IMPLICIT_DEF + %1:_(<8 x s16>) = G_IMPLICIT_DEF + %2:_(<8 x s16>) = G_INSERT %1, %0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_implicit_def_v6s16 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v6s16 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF1]](<8 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC1]], [[TRUNC]](<6 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + %0:_(<6 x s16>) = G_IMPLICIT_DEF + %1:_(<8 x s16>) = G_IMPLICIT_DEF + %2:_(<8 x s16>) = G_INSERT %1, %0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + --- name: test_implicit_def_v8s16 body: | Index: test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -619,8 +619,9 @@ ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16) - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF]], [[LSHR]](<2 x s16>), 0 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT6]], [[LSHR]](<2 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[LSHR1]](s16), 32 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s16>), 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -208,10 +208,12 @@ bb.0: ; CHECK-LABEL: name: test_or_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] Index: test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -603,8 +603,9 @@ ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT3]], [[EXTRACT5]](s16) - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[DEF]], [[SHL]](<2 x s16>), 0 + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT6]], [[SHL]](<2 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[SHL1]](s16), 32 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s16>), 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -57,8 +57,9 @@ body: | bb.0: ; CHECK-LABEL: name: test_unmerge_s16_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -208,10 +208,12 @@ bb.0: ; CHECK-LABEL: name: test_xor_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) + ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]]