Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -69,6 +69,7 @@ bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_GEP(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const; + bool selectG_INSERT(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -204,6 +204,34 @@ return true; } +bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); + DebugLoc DL = I.getDebugLoc(); + MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) + .addDef(I.getOperand(0).getReg()) + .addReg(I.getOperand(1).getReg()) + .addReg(I.getOperand(2).getReg()) + .addImm(SubReg); + + for (const MachineOperand &MO : Ins->operands()) { + if (!MO.isReg()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + + const TargetRegisterClass *RC = + TRI.getConstrainedRegClassForOperand(MO, MRI); + if (!RC) + continue; + RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); + } + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { unsigned IntrinsicID = I.getOperand(1).getIntrinsicID(); @@ -542,6 +570,8 @@ return selectG_GEP(I); case TargetOpcode::G_IMPLICIT_DEF: return selectG_IMPLICIT_DEF(I); + case TargetOpcode::G_INSERT: + return selectG_INSERT(I); case TargetOpcode::G_INTRINSIC: return selectG_INTRINSIC(I, CoverageInfo); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -0,0 +1,49 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +--- + +name: insert512 +legalized: true +regBankSelected: true + +# CHECK-LABEL: insert512 +# CHECK: [[BASE:%[0-9]+]]:sreg_512 = IMPLICIT_DEF +# CHECK: [[VAL:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF +# CHECK: [[BASE0:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE]], [[VAL]], %subreg.sub0 +# CHECK: [[BASE1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE0]], [[VAL]], %subreg.sub1 +# CHECK: [[BASE2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE1]], [[VAL]], %subreg.sub2 +# CHECK: [[BASE3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE2]], [[VAL]], %subreg.sub3 +# CHECK: [[BASE4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE3]], [[VAL]], %subreg.sub4 +# CHECK: [[BASE5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE4]], [[VAL]], %subreg.sub5 +# CHECK: [[BASE6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE5]], [[VAL]], %subreg.sub6 +# CHECK: [[BASE7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE6]], [[VAL]], %subreg.sub7 +# CHECK: [[BASE8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE7]], [[VAL]], %subreg.sub8 +# CHECK: [[BASE9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE8]], [[VAL]], %subreg.sub9 +# CHECK: [[BASE10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE9]], [[VAL]], %subreg.sub10 +# CHECK: [[BASE11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE10]], [[VAL]], %subreg.sub11 +# CHECK: [[BASE12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE11]], [[VAL]], %subreg.sub12 +# CHECK: [[BASE13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE12]], [[VAL]], %subreg.sub13 +# CHECK: [[BASE14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE13]], [[VAL]], %subreg.sub14 +# CHECK: [[BASE15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE14]], [[VAL]], %subreg.sub15 + +body: | + bb.0: + %0:sgpr(s512) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_IMPLICIT_DEF + %2:sgpr(s512) = G_INSERT %0:sgpr, %1:sgpr(s32), 0 + %3:sgpr(s512) = G_INSERT %2:sgpr, %1:sgpr(s32), 32 + %4:sgpr(s512) = G_INSERT %3:sgpr, %1:sgpr(s32), 64 + %5:sgpr(s512) = G_INSERT %4:sgpr, %1:sgpr(s32), 96 + %6:sgpr(s512) = G_INSERT %5:sgpr, %1:sgpr(s32), 128 + %7:sgpr(s512) = G_INSERT %6:sgpr, %1:sgpr(s32), 160 + %8:sgpr(s512) = G_INSERT %7:sgpr, %1:sgpr(s32), 192 + %9:sgpr(s512) = G_INSERT %8:sgpr, %1:sgpr(s32), 224 + %10:sgpr(s512) = G_INSERT %9:sgpr, %1:sgpr(s32), 256 + %11:sgpr(s512) = G_INSERT %10:sgpr, %1:sgpr(s32), 288 + %12:sgpr(s512) = G_INSERT %11:sgpr, %1:sgpr(s32), 320 + %13:sgpr(s512) = G_INSERT %12:sgpr, %1:sgpr(s32), 352 + %14:sgpr(s512) = G_INSERT %13:sgpr, %1:sgpr(s32), 384 + %15:sgpr(s512) = G_INSERT %14:sgpr, %1:sgpr(s32), 416 + %16:sgpr(s512) = G_INSERT %15:sgpr, %1:sgpr(s32), 448 + %17:sgpr(s512) = G_INSERT %16:sgpr, %1:sgpr(s32), 480 + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %17:sgpr(s512) + SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15