Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1835,53 +1835,39 @@ return; } case AMDGPU::G_SEXT_INREG: { - const RegisterBank *SrcBank = - OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank; - - // We can directly handle all 64-bit cases with s_bfe_i64. - if (SrcBank == &AMDGPU::SGPRRegBank) - break; + SmallVector SrcRegs(OpdMapper.getVRegs(1)); + if (SrcRegs.empty()) + break; // Nothing to repair const LLT S32 = LLT::scalar(32); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT Ty = MRI.getType(DstReg); - if (Ty == S32) - break; - MachineIRBuilder B(MI); ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank); GISelObserverWrapper Observer(&O); B.setChangeObserver(Observer); + // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs + // we would need to further expand, and doesn't let us directly set the + // result registers. + SmallVector DstRegs(OpdMapper.getVRegs(0)); + int Amt = MI.getOperand(2).getImm(); if (Amt <= 32) { - // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs - // we would need to further expand, and doesn't let us directly set the - // result registers. - SmallVector DstRegs(OpdMapper.getVRegs(0)); - SmallVector SrcRegs(OpdMapper.getVRegs(1)); - - if (SrcRegs.empty()) - split64BitValueForMapping(B, SrcRegs, S32, SrcReg); - // Extend in the low bits and propagate the sign bit to the high half. - auto ShiftAmt = B.buildConstant(S32, 31); if (Amt == 32) { + // The low bits are unchanged. B.buildCopy(DstRegs[0], SrcRegs[0]); - B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt); } else { + // Extend in the low bits and propagate the sign bit to the high half. B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt); - B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt); } + + B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31)); } else { - assert(empty(OpdMapper.getVRegs(0)) && empty(OpdMapper.getVRegs(1))); - const LLT S64 = LLT::scalar(64); - // This straddles two registers. Expand with 64-bit shifts. - auto ShiftAmt = B.buildConstant(S32, 64 - Amt); - auto Shl = B.buildShl(S64, SrcReg, ShiftAmt); - B.buildAShr(DstReg, Shl, ShiftAmt); + // The low bits are unchanged, and extend in the high bits. + B.buildCopy(DstRegs[0], SrcRegs[0]); + B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32); } + Register DstReg = MI.getOperand(0).getReg(); MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); MI.eraseFromParent(); return; @@ -2954,7 +2940,8 @@ } case AMDGPU::G_ZEXT: case AMDGPU::G_SEXT: - case AMDGPU::G_ANYEXT: { + case AMDGPU::G_ANYEXT: + case AMDGPU::G_SEXT_INREG: { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); unsigned DstSize = getSizeInBits(Dst, MRI, *TRI); @@ -2985,24 +2972,6 @@ } break; } - case AMDGPU::G_SEXT_INREG: { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Amt = MI.getOperand(2).getImm(); - unsigned Size = getSizeInBits(Dst, MRI, *TRI); - unsigned BankID = getRegBank(Src, MRI, *TRI)->getID(); - - if (Amt <= 32) { - OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(BankID, Size); - } else { - // If we need to expand a 64 bit for the VALU, this will straddle two - // registers. Just expand this with 64-bit shifts. - OpdsMapping[0] = AMDGPU::getValueMapping(BankID, Size); - } - - OpdsMapping[1] = OpdsMapping[0]; - break; - } case AMDGPU::G_FCMP: { unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir @@ -121,8 +121,8 @@ ; CHECK-LABEL: name: sext_inreg_v_s64_1 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32) ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) @@ -143,8 +143,8 @@ ; CHECK-LABEL: name: sext_inreg_v_s64_31 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32) ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) @@ -165,8 +165,8 @@ ; CHECK-LABEL: name: sext_inreg_v_s64_32 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32) ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32) ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) @@ -186,12 +186,55 @@ ; CHECK-LABEL: name: sext_inreg_v_s64_33 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[COPY]], [[C]](s32) - ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK: S_ENDPGM 0, implicit [[ASHR]](s64) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 33 S_ENDPGM 0, implicit %1 ... + +--- +name: sext_inreg_v_s64_35 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v_s64_35 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_SEXT_INREG %0, 35 + S_ENDPGM 0, implicit %1 + +... + +--- +name: sext_inreg_v_s64_63 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v_s64_63 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_SEXT_INREG %0, 63 + S_ENDPGM 0, implicit %1 + +...