Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -76,6 +76,7 @@ bool selectG_ADD(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; + bool selectG_UNMERGE_VALUES(MachineInstr &I) const; bool selectG_GEP(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -361,6 +361,51 @@ return true; } +bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { + MachineBasicBlock *BB = MI.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const int NumDst = MI.getNumOperands() - 1; + + MachineOperand &Src = MI.getOperand(NumDst); + + Register SrcReg = Src.getReg(); + Register DstReg0 = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg0); + LLT SrcTy = MRI.getType(SrcReg); + + const unsigned DstSize = DstTy.getSizeInBits(); + const unsigned SrcSize = SrcTy.getSizeInBits(); + const DebugLoc &DL = MI.getDebugLoc(); + const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); + + const TargetRegisterClass *SrcRC = + TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); + assert(SrcRC); + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) + return false; + + const unsigned SrcFlags = getUndefRegState(Src.isUndef()); + + // Note we could have mixed SGPR and VGPR destination banks for an SGPR + // source, and this relies on the fact that the same subregister indices are + // used for both. + ArrayRef SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); + for (int I = 0, E = NumDst; I != E; ++I) { + MachineOperand &Dst = MI.getOperand(I); + BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) + .addReg(SrcReg, SrcFlags, SubRegs[I]); + + const TargetRegisterClass *DstRC = + TRI.getConstrainedRegClassForOperand(Dst, MRI); + if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) + return false; + } + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { return selectG_ADD(I); } @@ -1184,6 +1229,8 @@ case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: return selectG_MERGE_VALUES(I); + case TargetOpcode::G_UNMERGE_VALUES: + return selectG_UNMERGE_VALUES(I); case TargetOpcode::G_GEP: return selectG_GEP(I); case TargetOpcode::G_IMPLICIT_DEF: Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -0,0 +1,217 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s 2> %t | FileCheck -check-prefix=GCN %s +# RUN: FileCheck -check-prefix=ERR %s < %t + +# ERR-NOT: remark: +# ERR: remark: :0:0: VReg has no regclass after selection: %3:sgpr(s192) = REG_SEQUENCE %0:sreg_64_xexec(s64), %subreg.sub0_sub1, %1:sreg_64_xexec(s64), %subreg.sub2_sub3, %2:sreg_64_xexec(s64), %subreg.sub4_sub5 (in function: test_merge_values_s_s192_s_s64_s_s64_s_s64) +# ERR-NEXT: remark: :0:0: VReg has no regclass after selection: %3:vgpr(s192) = REG_SEQUENCE %0:vreg_64(s64), %subreg.sub0_sub1, %1:vreg_64(s64), %subreg.sub2_sub3, %2:vreg_64(s64), %subreg.sub4_sub5 (in function: test_merge_values_v_s192_v_s64_v_s64_v_s64) +# ERR-NOT: remark: + +--- +name: test_unmerge_values_v_s32_v_s32_v_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: test_unmerge_values_v_s32_v_s32_v_s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2 +... + +--- +name: test_unmerge_values_s_s32_s_s32_s_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s_s64 + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2 +... + +--- +name: test_unmerge_values_v_s32_s_s32_s_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: test_unmerge_values_v_s32_s_s32_s_s64 + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2 +... + +--- +name: test_unmerge_values_s_s32_v_s32_s_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: test_unmerge_values_s_s32_v_s32_s_s64 + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2 +... + +--- +name: test_unmerge_values_s_s32_v_s32_s_s64_undef_src +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + + ; GCN-LABEL: name: test_unmerge_values_s_s32_v_s32_s_s64_undef_src + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY undef %2.sub0:sreg_64_xexec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY undef %2.sub1:sreg_64_xexec + ; GCN: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]] + %1:sgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES undef %0:sgpr(s64) + S_ENDPGM 0, implicit %1, implicit %2 +... + +--- +name: test_unmerge_values_s_s32_s_s32_s32_s_s96 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2 + + ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s32_s_s96 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2 + ; GCN: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub2 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] + %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2, implicit %3 +... + +--- +name: test_unmerge_values_s_s32_s_s32_s32_s_s32_s_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + + ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s32_s_s32_s_s128 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub2 + ; GCN: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub3 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32), %4:sgpr(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4 +... + +--- +name: test_unmerge_values_s_s64_s_s64_s_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + + ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s_s128 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]].sub0_sub1 + ; GCN: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]].sub2_sub3 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s64), %2:sgpr(s64) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2 +... + +--- +name: test_unmerge_values_rc_set_def_v_s32_v_s32_v_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: test_unmerge_values_rc_set_def_v_s32_v_s32_v_s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr_32(s32), %2:vgpr_32(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2 +... + +--- +name: test_unmerge_values_rc_set_use_v_s32_v_s32_v_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: test_unmerge_values_rc_set_use_v_s32_v_s32_v_s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + %0:vreg_64(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 + S_ENDPGM 0, implicit %1, implicit %2 +...