Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -161,6 +161,12 @@ "Has s_memrealtime instruction" >; +def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", + "HasInv2PiInlineImm", + "true", + "Has 1 / (2 * pi) as inline immediate" +>; + def Feature16BitInsts : SubtargetFeature<"16-bit-insts", "Has16BitInsts", "true", @@ -307,7 +313,7 @@ [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, - FeatureSMemRealTime + FeatureInv2PiInlineImm, FeatureSMemRealTime ] >; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -98,6 +98,7 @@ bool SGPRInitBug; bool HasSMemRealTime; bool Has16BitInsts; + bool HasInv2PiInlineImm; bool FlatAddressSpace; bool R600ALUInst; bool CaymanISA; @@ -504,6 +505,10 @@ return getGeneration() >= VOLCANIC_ISLANDS; } + bool hasInv2PiInlineImm() const { + return HasInv2PiInlineImm; + } + bool enableSIScheduler() const { return EnableSIScheduler; } Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -80,8 +80,10 @@ void printRegOperand(unsigned RegNo, raw_ostream &O); void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printImmediate32(uint32_t Imm, raw_ostream &O); - void printImmediate64(uint64_t Imm, raw_ostream &O); + void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); + void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -315,7 +315,9 @@ printOperand(MI, OpNo, STI, O); } -void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) { +void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { int32_t SImm = static_cast(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -340,11 +342,16 @@ O << "4.0"; else if (Imm == FloatToBits(-4.0f)) O << "-4.0"; + else if (Imm == 0x3e22f983 && + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) + O << "1/2pi"; else O << formatHex(static_cast(Imm)); } -void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) { +void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { int64_t SImm = static_cast(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -369,6 +376,9 @@ O << "4.0"; else if (Imm == DoubleToBits(-4.0)) O << "-4.0"; + else if (Imm == 0x3fc45f306dc9c882 && + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) + O << "1/2pi"; else { assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882); @@ -404,13 +414,13 @@ if (RCID != -1) { const MCRegisterClass &ImmRC = MRI.getRegClass(RCID); if (ImmRC.getSize() == 4) - printImmediate32(Op.getImm(), O); + printImmediate32(Op.getImm(), STI, O); else if (ImmRC.getSize() == 8) - printImmediate64(Op.getImm(), O); + printImmediate64(Op.getImm(), STI, O); else llvm_unreachable("Invalid register class size"); } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) { - printImmediate32(Op.getImm(), O); + printImmediate32(Op.getImm(), STI, O); } else { // We hit this for the immediate instruction bits that don't yet have a // custom printer. @@ -426,9 +436,9 @@ const MCRegisterClass &ImmRC = MRI.getRegClass(Desc.OpInfo[OpNo].RegClass); if (ImmRC.getSize() == 4) - printImmediate32(FloatToBits(Op.getFPImm()), O); + printImmediate32(FloatToBits(Op.getFPImm()), STI, O); else if (ImmRC.getSize() == 8) - printImmediate64(DoubleToBits(Op.getFPImm()), O); + printImmediate64(DoubleToBits(Op.getFPImm()), STI, O); else llvm_unreachable("Invalid register class size"); } Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -117,7 +117,8 @@ if (Val == FloatToBits(-4.0f)) return 247; - if (AMDGPU::isVI(STI) && Val == 0x3e22f983) // 1/(2*pi) + if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi) + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) return 248; return 255; @@ -152,7 +153,8 @@ if (Val == DoubleToBits(-4.0)) return 247; - if (AMDGPU::isVI(STI) && Val == 0x3fc45f306dc9c882) // 1/(2*pi) + if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi) + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) return 248; return 255; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1503,7 +1503,8 @@ (DoubleToBits(2.0) == Val) || (DoubleToBits(-2.0) == Val) || (DoubleToBits(4.0) == Val) || - (DoubleToBits(-4.0) == Val); + (DoubleToBits(-4.0) == Val) || + (ST.hasInv2PiInlineImm() && Val == 0x3fc45f306dc9c882); } // The actual type of the operand does not seem to matter as long @@ -1524,7 +1525,8 @@ (FloatToBits(2.0f) == Val) || (FloatToBits(-2.0f) == Val) || (FloatToBits(4.0f) == Val) || - (FloatToBits(-4.0f) == Val); + (FloatToBits(-4.0f) == Val) || + (ST.hasInv2PiInlineImm() && Val == 0x3e22f983); } bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, Index: test/CodeGen/AMDGPU/imm.ll =================================================================== --- test/CodeGen/AMDGPU/imm.ll +++ test/CodeGen/AMDGPU/imm.ll @@ -118,6 +118,24 @@ ret void } + +; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f32: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e22f983{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 1/2pi{{$}} +; GCN: buffer_store_dword [[REG]] +define void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) { + store float 0x3FC45F3060000000, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}} +; GCN: buffer_store_dword [[REG]] +define void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) { + store float 0xBFC45F3060000000, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}store_literal_imm_f32: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000 ; GCN: buffer_store_dword [[REG]] @@ -418,6 +436,30 @@ ret void } +; GCN-LABEL: {{^}}add_inline_imm_inv_2pi_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; SI-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30 +; SI: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} + +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1/2pi +; VI: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x3fc45f306dc9c882 + store double %y, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_m_inv_2pi_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30 +; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @add_m_inv_2pi_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xbfc45f306dc9c882 + store double %y, double addrspace(1)* %out + ret void +} ; GCN-LABEL: {{^}}add_inline_imm_1_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb @@ -599,6 +641,24 @@ ret void } +; GCN-LABEL: {{^}}store_inv_2pi_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30 +; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inv_2pi_f64(double addrspace(1)* %out) { + store double 0x3fc45f306dc9c882, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30 +; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) { + store double 0xbfc45f306dc9c882, double addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}store_literal_imm_f64: ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000