Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -49,6 +49,13 @@ def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; +// 32-bit VALU immediate operand that uses the constant bus. +def u32kimm : Operand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_KIMM32"; + let PrintMethod = "printU32ImmOperand"; +} + let OperandType = "OPERAND_IMMEDIATE" in { def u32imm : Operand { Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -50,7 +50,12 @@ /// Operand with register or 32-bit immediate OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET, /// Operand with register or inline constant - OPERAND_REG_INLINE_C + OPERAND_REG_INLINE_C, + + /// Operand with 32-bit immediate that uses the constant bus. The standard + /// OPERAND_IMMEDIATE should be used for special immediates such as source + /// modifiers. + OPERAND_KIMM32 }; } } Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1695,6 +1695,7 @@ } break; case MCOI::OPERAND_IMMEDIATE: + case AMDGPU::OPERAND_KIMM32: // Check if this operand is an immediate. // FrameIndex operands will be replaced by immediates, so they are // allowed. @@ -1731,6 +1732,10 @@ const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; unsigned ConstantBusCount = 0; + + if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) + ++ConstantBusCount; + unsigned SGPRUsed = findImplicitSGPRRead(MI); if (SGPRUsed != AMDGPU::NoRegister) ++ConstantBusCount; @@ -2020,9 +2025,12 @@ if (i == OpIdx) continue; const MachineOperand &Op = MI.getOperand(i); - if (Op.isReg() && - (Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && - usesConstantBus(MRI, Op, getOpSize(MI, i))) { + if (Op.isReg()) { + if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && + usesConstantBus(MRI, Op, getOpSize(MI, i))) { + return false; + } + } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) { return false; } } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1576,12 +1576,12 @@ def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> { - field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm); + field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32kimm:$imm); field string Asm32 = "$vdst, $src0, $src1, $imm"; field bit HasExt = 0; } def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> { - field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1); + field dag Ins32 = (ins VCSrc_32:$src0, u32kimm:$imm, VGPR_32:$src1); field string Asm32 = "$vdst, $src0, $imm, $src1"; field bit HasExt = 0; } Index: test/CodeGen/AMDGPU/madak.ll =================================================================== --- test/CodeGen/AMDGPU/madak.ll +++ test/CodeGen/AMDGPU/madak.ll @@ -191,3 +191,32 @@ store float %madak, float addrspace(1)* %out.gep, align 4 ret void } + +; SIFoldOperands should not fold the SGPR copy into the instruction +; because the implicit immediate already uses the constant bus. +; GCN-LABEL: {{^}}madak_constant_bus_violation: +; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}} +; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]] +; GCN: buffer_load_dword [[VGPR:v[0-9]+]] +; GCN: v_madak_f32_e32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000 +; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VGPR]], [[MADAK]] +; GCN: buffer_store_dword [[MUL]] +define void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 { +bb: + %tmp = icmp eq i32 %arg1, 0 + br i1 %tmp, label %bb3, label %bb4 + +bb3: + store volatile float 0.0, float addrspace(1)* undef + br label %bb4 + +bb4: + %vgpr = load volatile float, float addrspace(1)* undef + %tmp0 = fmul float %sgpr0, 0.5 + %tmp1 = fadd float %tmp0, 42.0 + %tmp2 = fmul float %tmp1, %vgpr + store volatile float %tmp2, float addrspace(1)* undef, align 4 + ret void +} + +attributes #0 = { nounwind}