Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2083,6 +2083,19 @@ return false; } +static int64_t getFoldableImm(const MachineOperand* MO) { + if (!MO->isReg()) + return false; + const MachineFunction *MF = MO->getParent()->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + auto Def = MRI.getUniqueVRegDef(MO->getReg()); + if (Def && (Def->getOpcode() == AMDGPU::S_MOV_B32 || + Def->getOpcode() == AMDGPU::V_MOV_B32_e32) && + Def->getOperand(1).isImm()) + return Def->getOperand(1).getImm(); + return AMDGPU::NoRegister; +} + MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const { @@ -2120,6 +2133,35 @@ const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (!Src0Mods && !Src1Mods && !Clamp && !Omod) { + if (auto Imm = getFoldableImm(Src2)) { + return BuildMI(*MBB, MI, MI.getDebugLoc(), + get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32)) + .add(*Dst) + .add(*Src0) + .add(*Src1) + .addImm(Imm); + } + if (auto Imm = getFoldableImm(Src1)) { + return BuildMI(*MBB, MI, MI.getDebugLoc(), + get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32)) + .add(*Dst) + .add(*Src0) + .addImm(Imm) + .add(*Src2); + } + if (auto Imm = getFoldableImm(Src0)) { + if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32, + AMDGPU::OpName::src0), Src1)) + return BuildMI(*MBB, MI, MI.getDebugLoc(), + get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32)) + .add(*Dst) + .add(*Src1) + .addImm(Imm) + .add(*Src2); + } + } + return BuildMI(*MBB, MI, MI.getDebugLoc(), get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32)) .add(*Dst) Index: llvm/trunk/test/CodeGen/AMDGPU/madak.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/madak.ll +++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll @@ -34,7 +34,7 @@ ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 -; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]] +; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 ; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]] ; GCN: s_endpgm define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { Index: llvm/trunk/test/CodeGen/AMDGPU/twoaddr-mad.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/twoaddr-mad.mir +++ llvm/trunk/test/CodeGen/AMDGPU/twoaddr-mad.mir @@ -0,0 +1,110 @@ +# RUN: llc -march=amdgcn %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: test_madmk_reg_imm_f32 +# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_reg_imm_f32 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F32_e32 killed %0.sub0, %2, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madmk_imm_reg_f32 +# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_imm_reg_f32 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F32_e32 %2, killed %0.sub0, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madak_f32 +# GCN: V_MADAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit %exec +--- +name: test_madak_f32 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = V_MOV_B32_e32 1078523331, implicit %exec + %2 = V_MAC_F32_e32 killed %0.sub0, %0.sub1, %1, implicit %exec + +... + +# GCN-LABEL: name: test_madmk_reg_imm_f16 +# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_reg_imm_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F16_e32 killed %0.sub0, %2, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madmk_imm_reg_f16 +# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit %exec +--- +name: test_madmk_imm_reg_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit %exec + %3 = V_MAC_F16_e32 %2, killed %0.sub0, killed %1, implicit %exec + +... + +# GCN-LABEL: name: test_madak_f16 +# GCN: V_MADAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit %exec +--- +name: test_madak_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = V_MOV_B32_e32 1078523331, implicit %exec + %2 = V_MAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit %exec +... Index: llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -23,9 +23,9 @@ } ; GCN-LABEL: {{^}}madak_f16_use_2 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_madak_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x41200000 ; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_madak_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x4900 ; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm define amdgpu_kernel void @madak_f16_use_2(