Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1852,12 +1852,33 @@ if (!DstTy.isScalar()) return false; - if (I.getOpcode() == AMDGPU::G_ANYEXT) - return selectCOPY(I); - // Artifact casts should never use vcc. const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI); + // FIXME: This should probably be illegal and split earlier. + if (I.getOpcode() == AMDGPU::G_ANYEXT) { + if (DstSize <= 32) + return selectCOPY(I); + + const TargetRegisterClass *SrcRC = + TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank, *MRI); + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); + const TargetRegisterClass *DstRC = + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + + Register UndefReg = MRI->createVirtualRegister(SrcRC); + BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); + BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(SrcReg) + .addImm(AMDGPU::sub0) + .addReg(UndefReg) + .addImm(AMDGPU::sub1); + I.eraseFromParent(); + + return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) && + RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI); + } + if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { // 64-bit should have been split up in RegBankSelect Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -22,22 +22,88 @@ ... --- +name: anyext_sgpr_s32_to_sgpr_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s32_to_sgpr_s64 + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s64) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 + +... -name: anyext_sgpr_s16_to_sgpr_s64 +--- +name: anyext_sgpr_s16_to_sgpr_s64 legalized: true regBankSelected: true -body: | +tracksRegLiveness: true +body: | bb.0: liveins: $sgpr0 ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s64 + ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]] - ; GCN: $sgpr0_sgpr1 = COPY [[COPY1]] + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_ANYEXT %1 - $sgpr0_sgpr1 = COPY %2 + S_ENDPGM 0, implicit %2 + +... + +--- +name: anyext_vgpr_s32_to_vgpr_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vgpr_s32_to_vgpr_s64 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s64) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 + +... + +--- +name: anyext_vgpr_s16_to_vgpr_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s64 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s64) = G_ANYEXT %1 + S_ENDPGM 0, implicit %2 ...