Index: lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- lib/Target/AMDGPU/AMDGPU.td
+++ lib/Target/AMDGPU/AMDGPU.td
@@ -161,6 +161,12 @@
   "Has s_memrealtime instruction"
 >;
 
+def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
+  "HasInv2PiInlineImm",
+  "true",
+  "Has 1 / (2 * pi) as inline immediate"
+>;
+
 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
   "Has16BitInsts",
   "true",
@@ -307,7 +313,7 @@
   [FeatureFP64, FeatureLocalMemorySize65536,
    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
-   FeatureSMemRealTime
+   FeatureInv2PiInlineImm, FeatureSMemRealTime
   ]
 >;
 
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -98,6 +98,7 @@
   bool SGPRInitBug;
   bool HasSMemRealTime;
   bool Has16BitInsts;
+  bool HasInv2PiInlineImm;
   bool FlatAddressSpace;
   bool R600ALUInst;
   bool CaymanISA;
@@ -504,6 +505,10 @@
     return getGeneration() >= VOLCANIC_ISLANDS;
   }
 
+  bool hasInv2PiInlineImm() const {
+    return HasInv2PiInlineImm;
+  }
+
   bool enableSIScheduler() const {
     return EnableSIScheduler;
   }
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -80,8 +80,10 @@
   void printRegOperand(unsigned RegNo, raw_ostream &O);
   void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                    raw_ostream &O);
-  void printImmediate32(uint32_t Imm, raw_ostream &O);
-  void printImmediate64(uint64_t Imm, raw_ostream &O);
+  void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
   void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O);
   void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo,
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -315,7 +315,9 @@
   printOperand(MI, OpNo, STI, O);
 }
 
-void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
   int32_t SImm = static_cast<int32_t>(Imm);
   if (SImm >= -16 && SImm <= 64) {
     O << SImm;
@@ -340,11 +342,16 @@
     O << "4.0";
   else if (Imm == FloatToBits(-4.0f))
     O << "-4.0";
+  else if (Imm == 0x3e22f983 &&
+           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    O << "1/2pi";
   else
     O << formatHex(static_cast<uint64_t>(Imm));
 }
 
-void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
   int64_t SImm = static_cast<int64_t>(Imm);
   if (SImm >= -16 && SImm <= 64) {
     O << SImm;
@@ -369,6 +376,9 @@
     O << "4.0";
   else if (Imm == DoubleToBits(-4.0))
     O << "-4.0";
+  else if (Imm == 0x3fc45f306dc9c882 &&
+           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    O << "1/2pi";
   else {
     assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
 
@@ -404,13 +414,13 @@
     if (RCID != -1) {
       const MCRegisterClass &ImmRC = MRI.getRegClass(RCID);
       if (ImmRC.getSize() == 4)
-        printImmediate32(Op.getImm(), O);
+        printImmediate32(Op.getImm(), STI, O);
       else if (ImmRC.getSize() == 8)
-        printImmediate64(Op.getImm(), O);
+        printImmediate64(Op.getImm(), STI, O);
       else
         llvm_unreachable("Invalid register class size");
     } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
-      printImmediate32(Op.getImm(), O);
+      printImmediate32(Op.getImm(), STI, O);
     } else {
       // We hit this for the immediate instruction bits that don't yet have a
       // custom printer.
@@ -426,9 +436,9 @@
       const MCRegisterClass &ImmRC = MRI.getRegClass(Desc.OpInfo[OpNo].RegClass);
 
       if (ImmRC.getSize() == 4)
-        printImmediate32(FloatToBits(Op.getFPImm()), O);
+        printImmediate32(FloatToBits(Op.getFPImm()), STI, O);
       else if (ImmRC.getSize() == 8)
-        printImmediate64(DoubleToBits(Op.getFPImm()), O);
+        printImmediate64(DoubleToBits(Op.getFPImm()), STI, O);
       else
         llvm_unreachable("Invalid register class size");
     }
Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -117,7 +117,8 @@
   if (Val == FloatToBits(-4.0f))
     return 247;
 
-  if (AMDGPU::isVI(STI) && Val == 0x3e22f983) // 1/(2*pi)
+  if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
+      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
     return 248;
 
   return 255;
@@ -152,7 +153,8 @@
   if (Val == DoubleToBits(-4.0))
     return 247;
 
-  if (AMDGPU::isVI(STI) && Val == 0x3fc45f306dc9c882) // 1/(2*pi)
+  if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
+      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
     return 248;
 
   return 255;
Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1503,7 +1503,8 @@
            (DoubleToBits(2.0) == Val) ||
            (DoubleToBits(-2.0) == Val) ||
            (DoubleToBits(4.0) == Val) ||
-           (DoubleToBits(-4.0) == Val);
+           (DoubleToBits(-4.0) == Val) ||
+           (ST.hasInv2PiInlineImm() && Val == 0x3fc45f306dc9c882);
   }
 
   // The actual type of the operand does not seem to matter as long
@@ -1524,7 +1525,8 @@
          (FloatToBits(2.0f) == Val) ||
          (FloatToBits(-2.0f) == Val) ||
          (FloatToBits(4.0f) == Val) ||
-         (FloatToBits(-4.0f) == Val);
+         (FloatToBits(-4.0f) == Val) ||
+         (ST.hasInv2PiInlineImm() && Val == 0x3e22f983);
 }
 
 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
Index: test/CodeGen/AMDGPU/imm.ll
===================================================================
--- test/CodeGen/AMDGPU/imm.ll
+++ test/CodeGen/AMDGPU/imm.ll
@@ -118,6 +118,24 @@
   ret void
 }
 
+
+; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f32:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e22f983{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 1/2pi{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) {
+  store float 0x3FC45F3060000000, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) {
+  store float 0xBFC45F3060000000, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}store_literal_imm_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
 ; GCN: buffer_store_dword [[REG]]
@@ -418,6 +436,30 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}add_inline_imm_inv_2pi_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; SI-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30
+; SI: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1/2pi
+; VI: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0x3fc45f306dc9c882
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}add_m_inv_2pi_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30
+; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @add_m_inv_2pi_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0xbfc45f306dc9c882
+  store double %y, double addrspace(1)* %out
+  ret void
+}
 
 ; GCN-LABEL: {{^}}add_inline_imm_1_f64:
 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
@@ -599,6 +641,24 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}store_inv_2pi_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inv_2pi_f64(double addrspace(1)* %out) {
+  store double 0x3fc45f306dc9c882, double addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) {
+  store double 0xbfc45f306dc9c882, double addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}store_literal_imm_f64:
 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000