diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3906,6 +3906,7 @@ } case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_KIMM16: + return false; case AMDGPU::OPERAND_INPUT_MODS: case MCOI::OPERAND_IMMEDIATE: // Always embedded in the instruction for free. @@ -5264,9 +5265,8 @@ return false; SGPRsUsed.insert(SGPR); } - } else if (InstDesc.operands()[i].OperandType == AMDGPU::OPERAND_KIMM32 || - (AMDGPU::isSISrcOperand(InstDesc, i) && - !isInlineConstant(Op, InstDesc.operands()[i]))) { + } else if (AMDGPU::isSISrcOperand(InstDesc, i) && + !isInlineConstant(Op, InstDesc.operands()[i])) { if (!LiteralLimit--) return false; if (--ConstantBusLimit <= 0) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -931,7 +931,7 @@ } // End isCommutable = 1 } // End SubtargetPredicate = isGFX11Plus -let FPDPRounding = 1, isReMaterializable = 1 in { +let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in { let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; } @@ -947,7 +947,7 @@ def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; } } // End isCommutable = 1 -} // End FPDPRounding = 1, isReMaterializable = 1 +} // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 let Constraints = "$vdst = $src2", DisableEncoding="$src2", @@ -1089,12 +1089,12 @@ } } // End AddedComplexity = 30 -let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { +let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in { def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; let isCommutable = 1 in def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; -} +} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 let SubtargetPredicate = HasPkFmacF16Inst in { defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll --- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll +++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll @@ -149,6 +149,7 @@ ; GFX9: codeLenInByte = 24 ; GFX10: codeLenInByte = 20 +; GFX11: codeLenInByte = 20 define float @v_mul_f32_vop2_frame_index(float %x) { ; GFX9-LABEL: v_mul_f32_vop2_frame_index: diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir b/llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir @@ -0,0 +1,85 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s + +# Do not fold noninlinable constants in instructions like fmaak and fmamk next +# to kimm16 and kimm32 operands + +--- +name: fmaak_f16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fmaak_f16 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec + ; GCN-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAAK_F16 [[COPY]], [[V_MOV_B32_e32_]], 11878, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_FMAAK_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec + %2:vgpr_32 = contract nofpexcept V_FMAAK_F16 %0, %1, 11878, implicit $mode, implicit $exec + $vgpr0 = COPY %2 +... + +--- +name: fmamk_f16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fmamk_f16 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec + ; GCN-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAMK_F16 [[COPY]], 11878, [[V_MOV_B32_e32_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_FMAMK_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec + %2:vgpr_32 = contract nofpexcept V_FMAMK_F16 %0, 11878, %1, implicit $mode, implicit $exec + $vgpr0 = COPY %2 +... + +--- +name: fmaak_f32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fmaak_f32 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec + ; GCN-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAAK_F32 [[COPY]], [[V_MOV_B32_e32_]], 778462824, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_FMAAK_F32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec + %2:vgpr_32 = contract nofpexcept V_FMAAK_F32 %0, %1, 778462824, implicit $mode, implicit $exec + $vgpr0 = COPY %2 +... + +--- +name: fmamk_f32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fmamk_f32 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec + ; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAMK_F32 [[COPY]], 778462824, [[V_MOV_B32_e32_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_FMAMK_F32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec + %2:vgpr_32 = contract nofpexcept V_FMAMK_F32 %0, 778462824, %1, implicit $mode, implicit $exec + $vgpr0 = COPY %2 +...