Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3837,11 +3837,13 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const { assert(!MO.isReg() && "isInlineConstant called on register operand!"); - if (!MO.isImm() || - OperandType < AMDGPU::OPERAND_SRC_FIRST || - OperandType > AMDGPU::OPERAND_SRC_LAST) + if (!MO.isImm()) return false; + if (OperandType < AMDGPU::OPERAND_SRC_FIRST || + OperandType > AMDGPU::OPERAND_SRC_LAST) + return true; + // MachineOperand provides no way to tell the true operand size, since it only // records a 64-bit value. We need to know the size to determine if a 32-bit // floating point immediate bit pattern is legal for an integer immediate. It Index: llvm/test/CodeGen/AMDGPU/code-size-estimate.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/code-size-estimate.ll @@ -0,0 +1,136 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX10 %s + +declare float @llvm.fabs.f32(float) + +define float @v_mul_f32_vop2(float %x, float %y) { +; GFX9-LABEL: v_mul_f32_vop2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x0a] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f32_vop2: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] + %mul = fmul float %x, %y + ret float %mul +} +; CHECK: codeLenInByte = 12 + +define float @v_mul_f32_vop2_inline_imm(float %x) { +; GFX9-LABEL: v_mul_f32_vop2_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x0a] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f32_vop2_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] + %mul = fmul float %x, 4.0 + ret float %mul +} +; CHECK: codeLenInByte = 12 + +define float @v_mul_f32_vop2_literal(float %x) { +; GFX9-LABEL: v_mul_f32_vop2_literal: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x0a,0x00,0x00,0xf6,0x42] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f32_vop2_literal: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] + %mul = fmul float %x, 123.0 + ret float %mul +} +; CHECK: codeLenInByte = 16 + +define float @v_mul_f32_vop3_src_mods(float %x, float %y) { +; GFX9-LABEL: v_mul_f32_vop3_src_mods: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x03,0x02,0x00] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f32_vop3_src_mods: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] + %fabs.x = call float @llvm.fabs.f32(float %x) + %mul = fmul float %fabs.x, %y + ret float %mul +} +; CHECK: codeLenInByte = 16 + +define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) { +; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0xed,0x01,0x00] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f32_vop3_src_mods_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] + %fabs.x = call float @llvm.fabs.f32(float %x) + %mul = fmul float %fabs.x, 4.0 + ret float %mul +} + +; CHECK: codeLenInByte = 16 + +define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) { +; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: s_mov_b32 s4, 0x42f60000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0xf6,0x42] +; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, s4 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x09,0x00,0x00] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f32_vop3_src_mods_literal: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] + %fabs.x = call float @llvm.fabs.f32(float %x) + %mul = fmul float %fabs.x, 123.0 + ret float %mul +} + +; GFX9: codeLenInByte = 24 +; GFX10: codeLenInByte = 20 + +define float @v_mul_f32_vop2_frame_index(float %x) { +; GFX9-LABEL: v_mul_f32_vop2_frame_index: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; encoding: [0x01,0x00,0x10,0xd1,0x86,0x40,0x00,0x00] +; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x0a] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] +; +; GFX10-LABEL: v_mul_f32_vop2_frame_index: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; encoding: [0x01,0x00,0x16,0xd5,0x85,0x40,0x00,0x00] +; GFX10-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x10] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] + %alloca = alloca i32, addrspace(5) + %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32 + %cast = bitcast i32 %ptrtoint to float + %mul = fmul float %x, %cast + ret float %mul +} + +; CHECK: codeLenInByte = 20 Index: llvm/test/CodeGen/AMDGPU/idiv-licm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/idiv-licm.ll +++ llvm/test/CodeGen/AMDGPU/idiv-licm.ll @@ -848,7 +848,6 @@ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s1 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v2 -; GFX11-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB5_1: ; %bb3 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -875,7 +874,6 @@ ; GFX11-NEXT: global_store_b16 v[5:6], v0, off ; GFX11-NEXT: s_cbranch_vccz .LBB5_1 ; GFX11-NEXT: ; %bb.2: ; %bb2 -; GFX11-NEXT: s_set_inst_prefetch_distance 0x2 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm @@ -1135,7 +1133,6 @@ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s4 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0 -; GFX11-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB7_1: ; %bb3 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1170,7 +1167,6 @@ ; GFX11-NEXT: global_store_b16 v2, v3, s[6:7] ; GFX11-NEXT: s_cbranch_vccz .LBB7_1 ; GFX11-NEXT: ; %bb.2: ; %bb2 -; GFX11-NEXT: s_set_inst_prefetch_distance 0x2 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm