Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1036,19 +1036,53 @@ case AMDGPU::G_ZEXT: { unsigned SrcReg = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy != LLT::scalar(1)) - return; + bool Signed = Opc == AMDGPU::G_SEXT; MachineIRBuilder B(MI); - bool Signed = Opc == AMDGPU::G_SEXT; + const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); + unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); - if (SrcBank->getID() == AMDGPU::SCCRegBankID || - SrcBank->getID() == AMDGPU::VCCRegBankID) { - const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI); - unsigned DstSize = DstTy.getSizeInBits(); + if (DstTy.isScalar() && + SrcBank != &AMDGPU::SGPRRegBank && + SrcBank != &AMDGPU::SCCRegBank && + SrcBank != &AMDGPU::VCCRegBank && + // FIXME: Should handle any type that round to s64 when irregular + // breakdowns supported. + DstTy.getSizeInBits() == 64 && + SrcTy.getSizeInBits() <= 32) { + const LLT S32 = LLT::scalar(32); + SmallVector DefRegs(OpdMapper.getVRegs(0)); + + // Extend to 32-bit, and then extend the low half. + if (Signed) { + // TODO: Should really be buildSExtOrCopy + B.buildSExtOrTrunc(DefRegs[0], SrcReg); + + // Replicate sign bit from 32-bit extended part. + auto ShiftAmt = B.buildConstant(S32, 31); + MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank); + B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt); + } else { + B.buildZExtOrTrunc(DefRegs[0], SrcReg); + B.buildConstant(DefRegs[1], 0); + } + MRI.setRegBank(DstReg, *SrcBank); + MI.eraseFromParent(); + return; + } + + if (SrcTy != LLT::scalar(1)) + return; + + if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) { + SmallVector DefRegs(OpdMapper.getVRegs(0)); + + const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ? + &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank; + + unsigned DstSize = DstTy.getSizeInBits(); // 64-bit select is SGPR only const bool UseSel64 = DstSize > 32 && SrcBank->getID() == AMDGPU::SCCRegBankID; @@ -1060,10 +1094,11 @@ MRI.setRegBank(True.getReg(0), *DstBank); MRI.setRegBank(False.getReg(0), *DstBank); + MRI.setRegBank(DstReg, *DstBank); + if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) { - auto Sel = B.buildSelect(SelType, SrcReg, True, False); - MRI.setRegBank(Sel.getReg(0), *DstBank); - B.buildMerge(DstReg, { Sel.getReg(0), Sel.getReg(0) }); + B.buildSelect(DefRegs[0], SrcReg, True, False); + B.buildCopy(DefRegs[1], DefRegs[0]); } else if (DstSize < 32) { auto Sel = B.buildSelect(SelType, SrcReg, True, False); MRI.setRegBank(Sel.getReg(0), *DstBank); @@ -1606,8 +1641,17 @@ break; } - OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize); - OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize); + // TODO: Should anyext be split into 32-bit part as well? + if (MI.getOpcode() == AMDGPU::G_ANYEXT) { + OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize); + } else { + // Scalar extend can use 64-bit BFE, but VGPRs require extending to + // 32-bits, and then to 64. + OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize); + OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(), + SrcSize); + } break; } case AMDGPU::G_FCMP: { Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -16,6 +16,22 @@ %1:_(s64) = G_SEXT %0 ... +--- +name: sext_s16_to_s64_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: sext_s16_to_s64_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s16) + %0:_(s32) = COPY $sgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s64) = G_SEXT %1 +... + --- name: sext_s32_to_s64_v legalized: true @@ -25,7 +41,10 @@ liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: sext_s32_to_s64_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s64) = G_SEXT [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_SEXT %0 ... @@ -146,7 +165,8 @@ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -258,11 +278,30 @@ ; CHECK-LABEL: name: sext_s1_to_s64_vgpr ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63 - ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32) - ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s64) = G_SEXT %1 ... + +--- +name: sext_s16_to_s64_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: sext_s16_to_s64_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s16) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s64) = G_SEXT %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -16,6 +16,22 @@ %1:_(s64) = G_ZEXT %0 ... +--- +name: zext_s16_to_s64_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: zext_s16_to_s64_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s16) + %0:_(s32) = COPY $sgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s64) = G_ZEXT %1 +... + --- name: zext_s32_to_s64_v legalized: true @@ -25,7 +41,9 @@ liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: zext_s32_to_s64_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_ZEXT %0 ... @@ -146,7 +164,8 @@ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -258,11 +277,28 @@ ; CHECK-LABEL: name: zext_s1_to_s64_vgpr ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63 - ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32) - ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[C]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s64) = G_ZEXT %1 ... + +--- +name: zext_s16_to_s64_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: zext_s16_to_s64_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s64) = G_ZEXT %1 +...