Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1497,7 +1497,8 @@ } bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { - bool Signed = I.getOpcode() == AMDGPU::G_SEXT; + bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG; + bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg; const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock &MBB = *I.getParent(); const Register DstReg = I.getOperand(0).getReg(); @@ -1505,7 +1506,8 @@ const LLT DstTy = MRI->getType(DstReg); const LLT SrcTy = MRI->getType(SrcReg); - const unsigned SrcSize = SrcTy.getSizeInBits(); + const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ? + I.getOperand(2).getImm() : SrcTy.getSizeInBits(); const unsigned DstSize = DstTy.getSizeInBits(); if (!DstTy.isScalar()) return false; @@ -1541,7 +1543,9 @@ } if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { - if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI)) + const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ? + AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass; + if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI)) return false; if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { @@ -1557,13 +1561,15 @@ const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. - if (DstSize > 32 && SrcSize <= 32) { + if (DstSize > 32 && (SrcSize <= 32 || InReg)) { // We need a 64-bit register source, but the high bits don't matter. Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned SubReg = InReg ? AMDGPU::sub0 : 0; + BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) - .addReg(SrcReg) + .addReg(SrcReg, 0, SubReg) .addImm(AMDGPU::sub0) .addReg(UndefReg) .addImm(AMDGPU::sub1); @@ -2122,6 +2128,7 @@ case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT_INREG: if (selectImpl(I, *CoverageInfo)) return true; return selectG_SZA_EXT(I); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir @@ -0,0 +1,323 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck -check-prefix=GCN %s + +--- + +name: sext_inreg_sgpr_s32_1 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_inreg_sgpr_s32_1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_BFE_I32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_SEXT_INREG %0, 1 + $sgpr0 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s32_2 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_inreg_sgpr_s32_2 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 131072, implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_BFE_I32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_SEXT_INREG %0, 2 + $sgpr0 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s32_8 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_inreg_sgpr_s32_8 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I8 [[COPY]] + ; GCN: $sgpr0 = COPY [[S_SEXT_I32_I8_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_SEXT_INREG %0, 8 + $sgpr0 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s32_16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_inreg_sgpr_s32_16 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] + ; GCN: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_SEXT_INREG %0, 16 + $sgpr0 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s32_31 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_inreg_sgpr_s32_31 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 2031616, implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_BFE_I32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_SEXT_INREG %0, 31 + $sgpr0 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s64_1 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: sext_inreg_sgpr_s64_1 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_SEXT_INREG %0, 1 + $sgpr0_sgpr1 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s64_2 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: sext_inreg_sgpr_s64_2 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 131072, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_SEXT_INREG %0, 2 + $sgpr0_sgpr1 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s64_8 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: sext_inreg_sgpr_s64_8 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 524288, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_SEXT_INREG %0, 8 + $sgpr0_sgpr1 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s64_16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: sext_inreg_sgpr_s64_16 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_SEXT_INREG %0, 16 + $sgpr0_sgpr1 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s64_31 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: sext_inreg_sgpr_s64_31 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2031616, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_SEXT_INREG %0, 31 + $sgpr0_sgpr1 = COPY %1 +... + +# Ideally this degenerate case would have been replaceed with a 32-bit shift by combines. +--- + +name: sext_inreg_sgpr_s64_32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: sext_inreg_sgpr_s64_32 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2097152, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_SEXT_INREG %0, 32 + $sgpr0_sgpr1 = COPY %1 +... + +--- + +name: sext_inreg_sgpr_s64_63 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: sext_inreg_sgpr_s64_63 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 4128768, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_SEXT_INREG %0, 63 + $sgpr0_sgpr1 = COPY %1 +... + +--- + +name: sext_inreg_vgpr_s32_1 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_inreg_vgpr_s32_1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 1, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_SEXT_INREG %0, 1 + $vgpr0 = COPY %1 +... + +--- + +name: sext_inreg_vgpr_s32_2 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_inreg_vgpr_s32_2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 2, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_SEXT_INREG %0, 2 + $vgpr0 = COPY %1 +... + +--- + +name: sext_inreg_vgpr_s32_8 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_inreg_vgpr_s32_8 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 8, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_SEXT_INREG %0, 8 + $vgpr0 = COPY %1 +... + +--- + +name: sext_inreg_vgpr_s32_16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_inreg_vgpr_s32_16 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 16, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_SEXT_INREG %0, 16 + $vgpr0 = COPY %1 +... + +--- + +name: sext_inreg_vgpr_s32_31 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_inreg_vgpr_s32_31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 31, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_SEXT_INREG %0, 31 + $vgpr0 = COPY %1 +...