Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2066,12 +2066,40 @@ if (Src2->isReg() && Src2->getReg() == Reg) { // Not allowed to use constant bus for another operand. // We can however allow an inline immediate as src0. - if (!Src0->isImm() && - (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) - return false; + bool Src0Inlined = false; + if (Src0->isReg()) { + // Try to inline constant if possible. + // If the Def moves immediate and the use is single + // We are saving VGPR here. + MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg()); + if (Def && Def->isMoveImmediate() && + isInlineConstant(Def->getOperand(1)) && + MRI->hasOneUse(Src0->getReg())) { + Src0->ChangeToImmediate(Def->getOperand(1).getImm()); + Src0Inlined = true; + } else if ((RI.isPhysicalRegister(Src0->getReg()) && + RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) || + (RI.isVirtualRegister(Src0->getReg()) && + RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) + return false; + // VGPR is okay as Src0 - fallthrough + } - if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) - return false; + if (Src1->isReg() && !Src0Inlined ) { + // We have one slot for inlinable constant so far - try to fill it + MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg()); + if (Def && Def->isMoveImmediate() && + isInlineConstant(Def->getOperand(1)) && + MRI->hasOneUse(Src1->getReg()) && + commuteInstruction(UseMI)) { + Src0->ChangeToImmediate(Def->getOperand(1).getImm()); + } else if ((RI.isPhysicalRegister(Src1->getReg()) && + RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || + (RI.isVirtualRegister(Src1->getReg()) && + RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) + return false; + // VGPR is okay as Src1 - fallthrough + } const int64_t Imm = ImmOp->getImm(); Index: test/CodeGen/AMDGPU/madak-inline-constant.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/madak-inline-constant.mir @@ -0,0 +1,185 @@ +# RUN: llc -march=amdgcn -run-pass peephole-opt -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192 +# GCN: S_MOV_B32 1082130432 +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec + +--- +name: test src1-inlined +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %18:sreg_32 = S_MOV_B32 1082130432 + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $exec + +... + + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192 +# GCN: S_MOV_B32 1082130432 +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec + +--- +name: test src0-inlined +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %18:sreg_32 = S_MOV_B32 1082130432 + %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $exec + +... + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192 +# GCN: S_MOV_B32 1082130432 +# GCN: %3:vgpr_32 = V_MADAK_F32 killed %0, killed %0, 1092616192, implicit $exec + +--- +name: test none-inlined +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %18:sreg_32 = S_MOV_B32 1082130432 + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %0, 0, %17, 0, 0, implicit $exec + +... + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192 +# GCN: V_MOV_B32_e32 1082130432 +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec + +--- +name: test src1-2vgprs-inlined +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %18:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $exec + +... + + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192 +# GCN: V_MOV_B32_e32 1082130432 +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec + +--- +name: test src0-2vgprs-inlined +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %18:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $exec + +... + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192, implicit $exec +# GCN: S_MOV_B32 1082130432 +# GCN: V_MADAK_F32 1082130432, killed $vgpr1, 1092616192, implicit $exec + +--- +name: test src0-phys-vgpr +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + + $vgpr1 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %18:sgpr_32 = S_MOV_B32 1082130432 + %19:vgpr_32 = V_MAC_F32_e64 0, killed $vgpr1, 0, killed %18, 0, %17, 0, 0, implicit $exec + +... + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192, implicit $exec +# GCN: S_MOV_B32 1082130432 +# GCN: V_MADAK_F32 1082130432, killed $vgpr0, 1092616192, implicit $exec + +--- +name: test src1-phys-vgpr +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %18:sgpr_32 = S_MOV_B32 1082130432 + %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed $vgpr0, 0, %17, 0, 0, implicit $exec + +... + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192, implicit $exec +# GCN: V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %1, 0, 0, implicit $exec + +--- +name: test src0-phys-sgpr +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2 + + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %17, 0, 0, implicit $exec + +... + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192, implicit $exec +# GCN: V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %1, 0, 0, implicit $exec + +--- +name: test src1-phys-sgpr +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2 + + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $exec + +... + +# GCN-LABEL: bb.0: +# GCN: V_MOV_B32_e32 1092616192, implicit $exec +# GCN: $sgpr2 = S_MOV_B32 1082130432 +# GCN: V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec + +--- +name: test src1-phys-sgpr-move +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + + %0:vgpr_32 = COPY $vgpr0 + %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + $sgpr2 = S_MOV_B32 1082130432 + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $exec + +...