Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -225,6 +225,7 @@ LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI); + LegalizeResult lowerUnmergeValues(MachineInstr &MI); private: MachineRegisterInfo &MRI; Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1936,6 +1936,8 @@ case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); + case G_UNMERGE_VALUES: + return lowerUnmergeValues(MI); } } @@ -3626,3 +3628,36 @@ MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { + const unsigned NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + + + // Expand scalarizing unmerge as bitcast to integer and shift. + if (!DstTy.isVector() && SrcTy.isVector() && + SrcTy.getElementType() == DstTy) { + LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); + Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); + + MIRBuilder.buildTrunc(Dst0Reg, Cast); + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned Offset = DstSize; + for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shift); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -943,6 +943,7 @@ .maxScalar(LitTyIdx, S256) .maxScalar(BigTyIdx, S512) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) + .lowerFor({{S16, V2S16}}) .widenScalarIf( [=](const LegalityQuery &Query) { const LLT &Ty = Query.Types[BigTyIdx]; Index: test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -266,9 +266,16 @@ ; CHECK-LABEL: name: test_unmerge_values_s16_of_concat_vectors_v2s16_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[UV]](s16), implicit [[UV1]](s16), implicit [[UV2]](s16), implicit [[UV3]](s16) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -119,34 +119,51 @@ ; GFX6-LABEL: name: test_add_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX6: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX8-LABEL: name: test_add_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[UV]], [[UV2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[UV1]], [[UV3]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ADD]](s16), [[ADD1]](s16) ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_add_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[UV]], [[UV2]] - ; GFX9: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[UV1]], [[UV3]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ADD]](s16), [[ADD1]](s16) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -82,10 +82,12 @@ ; CHECK-LABEL: name: test_anyext_v2s16_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = G_ANYEXT %0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -490,25 +490,42 @@ ; SI-LABEL: name: test_ashr_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) - ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT1]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[AND1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_ashr_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[UV]], [[UV2]](s16) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[UV1]], [[UV3]](s16) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC2]](s16) + ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC3]](s16) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_ashr_v2s16_v2s16 @@ -531,32 +548,46 @@ ; SI-LABEL: name: test_ashr_v2s16_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) - ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[UV2]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT1]], [[UV3]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[UV]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[UV1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_ashr_v2s16_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[UV]], [[UV2]](s32) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[UV1]], [[UV3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[UV]](s32) + ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[UV1]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_ashr_v2s16_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[UV]], [[UV2]](s32) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[UV1]], [[UV3]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[UV]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[UV1]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -665,42 +696,73 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s16) - ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) - ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV9]](s16) - ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT1]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV10]](s16) - ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16) - ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT2]], [[ZEXT2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR2]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV11]](s16) - ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16) - ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT3]], [[ZEXT3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[AND1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) + ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[AND2]](s32) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) + ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[ASHR6]], [[AND3]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR7]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[UV2]], [[UV8]](s16) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[UV3]], [[UV9]](s16) - ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[UV4]], [[UV10]](s16) - ; VI: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[UV5]], [[UV11]](s16) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC4]](s16) + ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC5]](s16) + ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC6]](s16) + ; VI: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC3]], [[TRUNC7]](s16) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16), [[ASHR2]](s16), [[ASHR3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_ashr_v4s16_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -87,17 +87,18 @@ liveins: $vgpr0 ; CHECK-LABEL: name: bswap_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT1]] - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[COPY2]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[COPY3]] + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -135,18 +135,22 @@ liveins: $vgpr0 ; CHECK-LABEL: name: ctlz_zero_undef_v2s16_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[ZEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[ZEXT1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -135,18 +135,22 @@ liveins: $vgpr0 ; CHECK-LABEL: name: ctlz_v2s16_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[ZEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C]] - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[ZEXT1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -133,15 +133,20 @@ liveins: $vgpr0 ; CHECK-LABEL: name: ctpop_v2s16_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[ZEXT1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -133,15 +133,20 @@ liveins: $vgpr0 ; CHECK-LABEL: name: cttz_zero_undef_v2s16_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[ZEXT1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -135,18 +135,23 @@ liveins: $vgpr0 ; CHECK-LABEL: name: cttz_v2s16_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[C]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]] ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[C]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]] ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[OR1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -189,20 +189,28 @@ ; SI-LABEL: name: test_fabs_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_fabs_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[UV]] - ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[UV1]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] + ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[TRUNC1]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FABS]](s16), [[FABS1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fabs_v2s16 @@ -287,18 +295,25 @@ ; SI-LABEL: name: test_fabs_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FABS3:%[0-9]+]]:_(s32) = G_FABS [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) @@ -306,12 +321,19 @@ ; VI-LABEL: name: test_fabs_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[UV2]] - ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[UV3]] - ; VI: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[UV4]] - ; VI: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[UV5]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] + ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[TRUNC1]] + ; VI: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[TRUNC2]] + ; VI: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FABS]](s16), [[FABS1]](s16), [[FABS2]](s16), [[FABS3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fabs_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -268,14 +268,21 @@ ; SI-LABEL: name: test_fadd_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) @@ -283,10 +290,17 @@ ; VI-LABEL: name: test_fadd_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV]], [[UV2]] - ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[UV1]], [[UV3]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC2]] + ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fadd_v2s16 @@ -393,25 +407,38 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) + ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) @@ -420,15 +447,28 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV2]], [[UV8]] - ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[UV3]], [[UV9]] - ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[UV4]], [[UV10]] - ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[UV5]], [[UV11]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC4]] + ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC5]] + ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC6]] + ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[TRUNC7]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fadd_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -174,20 +174,28 @@ ; SI-LABEL: name: test_fcanonicalize_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_fcanonicalize_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV1]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v2s16 @@ -272,18 +280,25 @@ ; SI-LABEL: name: test_fcanonicalize_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) @@ -291,12 +306,19 @@ ; VI-LABEL: name: test_fcanonicalize_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV2]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV3]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV4]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV5]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16), [[FCANONICALIZE2]](s16), [[FCANONICALIZE3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir @@ -325,18 +325,25 @@ ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX7: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX7: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX7: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX7: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; GFX7: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX7: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; GFX7: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]] - ; GFX7: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; GFX7: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; GFX7: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX7: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; GFX7: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]] - ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX7: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX7: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV4]], [[UV6]] - ; GFX7: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV5]], [[UV7]] + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; GFX7: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV]], [[UV2]] + ; GFX7: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_icmp_v2s16 @@ -344,14 +351,21 @@ ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX8: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s16), [[UV2]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s16), [[UV3]] - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV4]], [[UV6]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV5]], [[UV7]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC]](s16), [[TRUNC2]] + ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC1]](s16), [[TRUNC3]] + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV]], [[UV2]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_icmp_v2s16 @@ -359,14 +373,21 @@ ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s16), [[UV2]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s16), [[UV3]] - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV4]], [[UV6]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV5]], [[UV7]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC]](s16), [[TRUNC2]] + ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[TRUNC1]](s16), [[TRUNC3]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[UV]], [[UV2]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -319,17 +319,27 @@ ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) @@ -338,11 +348,21 @@ ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<2 x s16>) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC2]], [[TRUNC4]] + ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fma_v2s16 @@ -469,32 +489,51 @@ ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI: [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV12]](<2 x s16>) - ; SI: [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV13]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV14]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV15]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) - ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV16]](s16) + ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; SI: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) - ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV17]](s16) + ; SI: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) @@ -504,18 +543,37 @@ ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI: [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV12]](<2 x s16>) - ; VI: [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV13]](<2 x s16>) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[UV2]], [[UV8]], [[UV14]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[UV3]], [[UV9]], [[UV15]] - ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[UV4]], [[UV10]], [[UV16]] - ; VI: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[UV5]], [[UV11]], [[UV17]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] + ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] + ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] + ; VI: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16), [[FMA2]](s16), [[FMA3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fma_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -324,13 +324,20 @@ ; SI-LABEL: name: test_fminnum_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC %14(s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC %11(s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) @@ -339,15 +346,22 @@ ; VI-LABEL: name: test_fminnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %7(s16), %8(s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV1]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV3]] + ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV2]] + ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -475,22 +489,35 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC %26(s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC %23(s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC %20(s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) + ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC %17(s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) @@ -502,24 +529,37 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %11(s16), %12(s16), %13(s16), %14(s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV5]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV11]] + ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV4]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV10]] + ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV3]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV9]] + ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV2]] - ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV8]] + ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -324,13 +324,20 @@ ; SI-LABEL: name: test_fminnum_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC %14(s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC %11(s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) @@ -339,15 +346,22 @@ ; VI-LABEL: name: test_fminnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %7(s16), %8(s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV1]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV3]] + ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV2]] + ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -475,22 +489,35 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC %26(s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC %23(s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC %20(s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) + ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC %17(s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) @@ -502,24 +529,37 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %11(s16), %12(s16), %13(s16), %14(s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV5]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV11]] + ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV4]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV10]] + ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV3]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV9]] + ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV2]] - ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[UV8]] + ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -267,14 +267,21 @@ ; SI-LABEL: name: test_fmul_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) @@ -282,10 +289,17 @@ ; VI-LABEL: name: test_fmul_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[UV]], [[UV2]] - ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[UV1]], [[UV3]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] + ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fmul_v2s16 @@ -392,25 +406,38 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) + ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) @@ -419,15 +446,28 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[UV2]], [[UV8]] - ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[UV3]], [[UV9]] - ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[UV4]], [[UV10]] - ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[UV5]], [[UV11]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] + ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] + ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] + ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16), [[FMUL2]](s16), [[FMUL3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fmul_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -187,20 +187,28 @@ ; SI-LABEL: name: test_fneg_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_fneg_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV1]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FNEG]](s16), [[FNEG1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fneg_v2s16 @@ -285,18 +293,25 @@ ; SI-LABEL: name: test_fneg_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) @@ -304,12 +319,19 @@ ; VI-LABEL: name: test_fneg_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV2]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV3]] - ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[UV4]] - ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[UV5]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] + ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC2]] + ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FNEG]](s16), [[FNEG1]](s16), [[FNEG2]](s16), [[FNEG3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fneg_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -26,9 +26,13 @@ ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: %4:_(s32) = nnan G_FPEXT [[UV]](s16) - ; CHECK: %5:_(s32) = nnan G_FPEXT [[UV1]](s16) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: %4:_(s32) = nnan G_FPEXT [[TRUNC]](s16) + ; CHECK: %5:_(s32) = nnan G_FPEXT [[TRUNC1]](s16) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -44,9 +48,13 @@ ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32_w_flags ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: %4:_(s32) = nnan G_FPEXT [[UV]](s16) - ; CHECK: %5:_(s32) = nnan G_FPEXT [[UV1]](s16) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: %4:_(s32) = nnan G_FPEXT [[TRUNC]](s16) + ; CHECK: %5:_(s32) = nnan G_FPEXT [[TRUNC1]](s16) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -83,12 +91,19 @@ ; CHECK-LABEL: name: test_fpext_v4f16_to_v4f32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; CHECK: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; CHECK: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; CHECK: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32), [[FPEXT3]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -189,27 +189,39 @@ ; SI-LABEL: name: test_fsqrt_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_fsqrt_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fsqrt_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -281,18 +293,25 @@ ; SI-LABEL: name: test_fsqrt_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) @@ -300,23 +319,37 @@ ; VI-LABEL: name: test_fsqrt_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV3]] - ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV4]] - ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[UV5]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fsqrt_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV3]] - ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV4]] - ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[UV5]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -311,19 +311,26 @@ ; SI-LABEL: name: test_fsub_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC1]](s16) ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) @@ -332,12 +339,19 @@ ; VI-LABEL: name: test_fsub_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV1]] - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV2]], [[FNEG]] - ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[UV3]], [[FNEG1]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG]] + ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG1]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fsub_v2s16 @@ -446,36 +460,49 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) + ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT6]] ; SI: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG2]](s32) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT7]], [[FPEXT8]] ; SI: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) + ; SI: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT9]] ; SI: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG3]](s32) - ; SI: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; SI: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] ; SI: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) @@ -485,38 +512,64 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV8]] - ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV2]], [[FNEG]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV9]] - ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[UV3]], [[FNEG1]] - ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[UV10]] - ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[UV4]], [[FNEG2]] - ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[UV11]] - ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[UV5]], [[FNEG3]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]] + ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] + ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC6]] + ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] + ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_fsub_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[UV8]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[UV2]], [[FNEG]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[UV9]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[UV3]], [[FNEG1]] - ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[UV10]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[UV4]], [[FNEG2]] - ; GFX9: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[UV11]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[UV5]], [[FNEG3]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]] + ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC6]] + ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX9: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] + ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -741,18 +741,26 @@ ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX7: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX7: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX7: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; GFX7: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ZEXT]](s32), [[ZEXT1]] - ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; GFX7: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ZEXT2]](s32), [[ZEXT3]] - ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX7: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX7: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; GFX7: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX7: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s32), [[AND1]] + ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX7: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX7: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND2]](s32), [[AND3]] + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; GFX7: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; GFX7: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_icmp_v2s16 @@ -760,14 +768,21 @@ ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX8: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s16), [[UV2]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s16), [[UV3]] - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[TRUNC2]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[TRUNC3]] + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_icmp_v2s16 @@ -775,14 +790,21 @@ ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s16), [[UV2]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s16), [[UV3]] - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[TRUNC2]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[TRUNC3]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -478,26 +478,41 @@ ; SI-LABEL: name: test_lshr_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[ZEXT2]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_lshr_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UV]], [[UV2]](s16) - ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[UV1]], [[UV3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR]](s16), [[LSHR1]](s16) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC2]](s16) + ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC3]](s16) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR2]](s16), [[LSHR3]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_lshr_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -519,33 +534,46 @@ ; SI-LABEL: name: test_lshr_v2s16_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[UV2]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[UV3]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[UV]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[UV1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_lshr_v2s16_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UV]], [[UV2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[UV1]], [[UV3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR]](s16), [[LSHR1]](s16) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[UV]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[UV1]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[LSHR2]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_lshr_v2s16_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UV]], [[UV2]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[UV1]], [[UV3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR]](s16), [[LSHR1]](s16) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[UV]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[UV1]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[LSHR2]](s16) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -653,43 +681,70 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV9]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[ZEXT2]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV10]](s16) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[ZEXT4]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UV11]](s16) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[ZEXT6]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[AND6]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; VI-LABEL: name: test_lshr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UV2]], [[UV8]](s16) - ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[UV3]], [[UV9]](s16) - ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[UV4]], [[UV10]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UV5]], [[UV11]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[LSHR]](s16), [[LSHR1]](s16), [[LSHR2]](s16), [[LSHR3]](s16) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC4]](s16) + ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC5]](s16) + ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC6]](s16) + ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[TRUNC7]](s16) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16), [[LSHR6]](s16), [[LSHR7]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -257,34 +257,51 @@ ; GFX6-LABEL: name: test_mul_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[ANYEXT]], [[ANYEXT1]] + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]] ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX6: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[ANYEXT2]], [[ANYEXT3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY4]], [[COPY5]] ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX8-LABEL: name: test_mul_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[UV]], [[UV2]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[UV1]], [[UV3]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC1]], [[TRUNC3]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[MUL]](s16), [[MUL1]](s16) ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_mul_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[UV]], [[UV2]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[UV1]], [[UV3]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC1]], [[TRUNC3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[MUL]](s16), [[MUL1]](s16) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -64,15 +64,18 @@ ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: G_BR %bb.2 @@ -194,26 +197,31 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; CHECK: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY7]] ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[COPY9]] ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK: G_BR %bb.2 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -83,10 +83,16 @@ ; CHECK-LABEL: name: test_sext_v2s16_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) - ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = G_SEXT %0 @@ -123,13 +129,24 @@ ; CHECK-LABEL: name: test_sext_v4s16_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) - ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16) - ; CHECK: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32), [[SEXT2]](s32), [[SEXT3]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32), [[ASHR3]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_SEXT %0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -471,25 +471,38 @@ ; SI-LABEL: name: test_shl_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[AND1]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_shl_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV]], [[UV2]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[UV1]], [[UV3]](s16) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_shl_v2s16_v2s16 @@ -512,32 +525,42 @@ ; SI-LABEL: name: test_shl_v2s16_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[UV2]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[UV]](s32) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[UV3]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[UV1]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_shl_v2s16_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV]], [[UV2]](s32) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[UV1]], [[UV3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[UV]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[UV1]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_shl_v2s16_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[UV]](s32) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[UV1]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -646,26 +669,36 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV9]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[AND1]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV10]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT2]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV11]](s16) - ; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[AND3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) @@ -673,15 +706,28 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV2]], [[UV8]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[UV3]], [[UV9]](s16) - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[UV4]], [[UV10]](s16) - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[UV5]], [[UV11]](s16) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16), [[SHL2]](s16), [[SHL3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_shl_v4s16_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -274,25 +274,43 @@ ; SI-LABEL: name: test_smax_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) - ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT]], [[SEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32) - ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT2]], [[SEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[ASHR2]], [[ASHR3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_smax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[UV]], [[UV2]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[UV1]], [[UV3]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC2]] + ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_smax_v2s16 @@ -382,26 +400,47 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) - ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV8]](s16) - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT]], [[SEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32) - ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV9]](s16) - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT2]], [[SEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[ASHR2]], [[ASHR3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32) - ; SI: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16) - ; SI: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[UV10]](s16) - ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT4]], [[SEXT5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) + ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) + ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[ASHR4]], [[ASHR5]] ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX2]](s32) - ; SI: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16) - ; SI: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[UV11]](s16) - ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SEXT6]], [[SEXT7]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL6]], [[C]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) + ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) + ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[ASHR6]], [[ASHR7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) @@ -409,15 +448,28 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[UV2]], [[UV8]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[UV3]], [[UV9]] - ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[UV4]], [[UV10]] - ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[UV5]], [[UV11]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC4]] + ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] + ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] + ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16), [[SMAX2]](s16), [[SMAX3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_smax_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -274,25 +274,43 @@ ; SI-LABEL: name: test_smin_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) - ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT]], [[SEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32) - ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT2]], [[SEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[ASHR2]], [[ASHR3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_smin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[UV]], [[UV2]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[UV1]], [[UV3]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC2]] + ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_smin_v2s16 @@ -382,26 +400,47 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) - ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV8]](s16) - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT]], [[SEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32) - ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV9]](s16) - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT2]], [[SEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[ASHR2]], [[ASHR3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32) - ; SI: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16) - ; SI: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[UV10]](s16) - ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT4]], [[SEXT5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) + ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) + ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[ASHR4]], [[ASHR5]] ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN2]](s32) - ; SI: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16) - ; SI: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[UV11]](s16) - ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SEXT6]], [[SEXT7]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL6]], [[C]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) + ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) + ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[ASHR6]], [[ASHR7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) @@ -409,15 +448,28 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[UV2]], [[UV8]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[UV3]], [[UV9]] - ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[UV4]], [[UV10]] - ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[UV5]], [[UV11]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC4]] + ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] + ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] + ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16), [[SMIN2]](s16), [[SMIN3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_smin_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir @@ -119,34 +119,51 @@ ; GFX6-LABEL: name: test_sub_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]] + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]] ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) - ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; GFX6: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[ANYEXT2]], [[ANYEXT3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY4]], [[COPY5]] ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB1]](s32) ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX8-LABEL: name: test_sub_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[UV]], [[UV2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[UV1]], [[UV3]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SUB]](s16), [[SUB1]](s16) ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_sub_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[UV]], [[UV2]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[UV1]], [[UV3]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SUB]](s16), [[SUB1]](s16) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -260,25 +260,40 @@ ; SI-LABEL: name: test_umax_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT]], [[ZEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT2]], [[ZEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_umax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[UV]], [[UV2]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[UV1]], [[UV3]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC2]] + ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_umax_v2s16 @@ -368,26 +383,40 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s16) - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT]], [[ZEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV9]](s16) - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT2]], [[ZEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32) - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV10]](s16) - ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT4]], [[ZEXT5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX2]](s32) - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UV11]](s16) - ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT6]], [[ZEXT7]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) @@ -395,15 +424,28 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[UV2]], [[UV8]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[UV3]], [[UV9]] - ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[UV4]], [[UV10]] - ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[UV5]], [[UV11]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC4]] + ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] + ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] + ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16), [[UMAX2]](s16), [[UMAX3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_umax_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -260,25 +260,40 @@ ; SI-LABEL: name: test_umin_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT]], [[ZEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT2]], [[ZEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_umin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[UV]], [[UV2]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[UV1]], [[UV3]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC2]] + ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_umin_v2s16 @@ -368,26 +383,40 @@ ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; SI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s16) - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT]], [[ZEXT1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV9]](s16) - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT2]], [[ZEXT3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32) - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV10]](s16) - ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT4]], [[ZEXT5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN2]](s32) - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UV11]](s16) - ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT6]], [[ZEXT7]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) @@ -395,15 +424,28 @@ ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV6]](<2 x s16>) - ; VI: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV7]](<2 x s16>) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[UV2]], [[UV8]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[UV3]], [[UV9]] - ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[UV4]], [[UV10]] - ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[UV5]], [[UV11]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC4]] + ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] + ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] + ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16), [[UMIN2]](s16), [[UMIN3]](s16) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; GFX9-LABEL: name: test_umin_v4s16 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -39,11 +39,13 @@ liveins: $vgpr0 ; CHECK-LABEL: name: test_unmerge_s16_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK: $vgpr1 = COPY [[COPY2]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0 %3:_(s32) = G_ANYEXT %1 @@ -84,16 +86,19 @@ ; CHECK-LABEL: name: test_unmerge_s16_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK: $vgpr1 = COPY [[COPY2]](s32) + ; CHECK: $vgpr2 = COPY [[COPY3]](s32) + ; CHECK: $vgpr3 = COPY [[COPY4]](s32) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %0 %5:_(s32) = G_ANYEXT %1 @@ -114,21 +119,25 @@ ; CHECK-LABEL: name: test_unmerge_s16_v6s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; CHECK: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; CHECK: [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV2]](<2 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) - ; CHECK: $vgpr4 = COPY [[ANYEXT4]](s32) - ; CHECK: $vgpr5 = COPY [[ANYEXT5]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK: $vgpr1 = COPY [[COPY2]](s32) + ; CHECK: $vgpr2 = COPY [[COPY3]](s32) + ; CHECK: $vgpr3 = COPY [[COPY4]](s32) + ; CHECK: $vgpr4 = COPY [[COPY5]](s32) + ; CHECK: $vgpr5 = COPY [[COPY6]](s32) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 %7:_(s32) = G_ANYEXT %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -86,10 +86,15 @@ ; CHECK-LABEL: name: test_zext_v2s16_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = G_ZEXT %0 @@ -126,13 +131,21 @@ ; CHECK-LABEL: name: test_zext_v4s16_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_ZEXT %0