Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -4050,7 +4050,7 @@
 - ``r``: A 32 or 64-bit integer register.
 - ``[0-9]v``: The 32-bit VGPR register, number 0-9.
 - ``[0-9]s``: The 32-bit SGPR register, number 0-9.
-
+- ``A``: An integer or a floating-point inline constant.
 
 All ARM modes:
 
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1311,7 +1311,15 @@
     AMDGPUInstPrinter::printRegOperand(MO.getReg(), O,
                                        *MF->getSubtarget().getRegisterInfo());
     return false;
+  } else if (MO.isImm()) {
+    int64_t Val = MO.getImm();
+    if (isInlineAsmConst(Val)) {
+      printInlineAsmConst(Val, O);
+      return false;
+    } else { // A literal
+      O << Val;
+      return false;
+    }
   }
-
   return true;
 }
Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
===================================================================
--- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -270,6 +270,12 @@
   void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 };
 
+namespace AMDGPU {
+
+void printInlineAsmConst(uint64_t Imm, raw_ostream &O);
+
+} // End namespace AMDGPU
+
 } // End namespace llvm
 
 #endif
Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -439,13 +439,11 @@
     O << formatHex(static_cast<uint64_t>(Imm));
 }
 
-void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
+static bool printInlineConst64(uint64_t Imm, raw_ostream &O, bool HasInv2Pi) {
   int64_t SImm = static_cast<int64_t>(Imm);
   if (SImm >= -16 && SImm <= 64) {
     O << SImm;
-    return;
+    return true;
   }
 
   if (Imm == DoubleToBits(0.0))
@@ -466,10 +464,19 @@
     O << "4.0";
   else if (Imm == DoubleToBits(-4.0))
     O << "-4.0";
-  else if (Imm == 0x3fc45f306dc9c882 &&
-           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+  else if (Imm == 0x3fc45f306dc9c882 && HasInv2Pi)
     O << "0.15915494309189532";
   else {
+    return false;
+  }
+  return true;
+}
+
+void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  bool HasInv2Pi = STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
+  if (!printInlineConst64(Imm, O, HasInv2Pi)) {
     assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
 
     // In rare situations, we will have a 32-bit literal in a 64-bit
@@ -478,6 +485,16 @@
   }
 }
 
+namespace llvm {
+namespace AMDGPU {
+
+void printInlineAsmConst(uint64_t Imm, raw_ostream &O) {
+  printInlineConst64(Imm, O, true);
+}
+
+} // End namespace AMDGPU
+} // End namespace llvm
+
 void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
                                   const MCSubtargetInfo &STI,
                                   raw_ostream &O) {
Index: llvm/lib/Target/AMDGPU/SIISelLowering.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -383,6 +383,10 @@
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                StringRef Constraint, MVT VT) const override;
   ConstraintType getConstraintType(StringRef Constraint) const override;
+  void LowerAsmOperandForConstraint(SDValue Op,
+                                    std::string &Constraint,
+                                    std::vector<SDValue> &Ops,
+                                    SelectionDAG &DAG) const override;
   SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
                    SDValue V) const;
 
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10707,11 +10707,47 @@
     case 'v':
     case 'a':
       return C_RegisterClass;
+    case 'A':
+      return C_Other;
     }
   }
   return TargetLowering::getConstraintType(Constraint);
 }
 
+void SITargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                    std::string &Constraint,
+                                                    std::vector<SDValue> &Ops,
+                                                    SelectionDAG &DAG) const {
+  if (Constraint.length() == 1 && Constraint[0] == 'A') {
+    unsigned Size = Op.getValueSizeInBits();
+    if (Size > 64)
+      return;
+
+    uint64_t Val;
+    bool IsConst = false;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      Val = C->getSExtValue();
+      IsConst = true;
+    } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
+      Val = C->getValueAPF().bitcastToAPInt().getSExtValue();
+      IsConst = true;
+    }
+
+    if (IsConst) {
+      bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
+      if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
+          (Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
+          (Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
+        auto IVal = AMDGPU::createInlineAsmConst(Val, Size);
+        auto Res = DAG.getTargetConstant(IVal, SDLoc(Op), MVT::i64);
+        Ops.push_back(Res);
+      }
+    }
+  } else {
+    TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+  }
+}
+
 // Figure out which registers should be reserved for stack access. Only after
 // the function is legalized do we know all of the non-spill stack objects or if
 // calls are present.
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -769,6 +769,12 @@
   }
 };
 
+LLVM_READNONE
+uint64_t createInlineAsmConst(uint64_t Val, unsigned Size);
+
+LLVM_READNONE
+bool isInlineAsmConst(uint64_t Val);
+
 } // end namespace AMDGPU
 } // end namespace llvm
 
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -13,6 +13,7 @@
 #include "SIDefines.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/IR/Attributes.h"
@@ -1141,8 +1142,12 @@
   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
 }
 
+static bool isInlinableIntLiteral(int64_t Literal) {
+  return Literal >= -16 && Literal <= 64;
+}
+
 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
-  if (Literal >= -16 && Literal <= 64)
+  if (isInlinableIntLiteral(Literal))
     return true;
 
   uint64_t Val = static_cast<uint64_t>(Literal);
@@ -1159,7 +1164,7 @@
 }
 
 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
-  if (Literal >= -16 && Literal <= 64)
+  if (isInlinableIntLiteral(Literal))
     return true;
 
   // The actual type of the operand does not seem to matter as long
@@ -1188,7 +1193,7 @@
   if (!HasInv2Pi)
     return false;
 
-  if (Literal >= -16 && Literal <= 64)
+  if (isInlinableIntLiteral(Literal))
     return true;
 
   uint16_t Val = static_cast<uint16_t>(Literal);
@@ -1410,5 +1415,39 @@
                       : getGfx9BufferFormatInfo(Format);
 }
 
+// Convert fp inline constants to 64-bit. f64 has the following advantages:
+// 1) When printed, these constants are valid for any operand type.
+// 2) Encoding of f64 inline constants is different from any 32-bit literal.
+//    This may be useful for future extensions.
+uint64_t createInlineAsmConst(uint64_t Val, unsigned Size) {
+  assert(Size == 16 || Size == 32 || Size == 64);
+
+  if (isInlinableIntLiteral(Val))
+    return Val;
+
+  // Convert inv2pi.
+  if ((Size == 16 && Val == 0x3118) ||
+      (Size == 32 && Val == 0x3e22f983))
+    return 0x3fc45f306dc9c882;
+
+  // Convert remaining fp constants.
+  if (Size < 64) {
+    bool Lost;
+    const fltSemantics *FltSemantics =
+      (Size == 16) ? &APFloat::IEEEhalf() : &APFloat::IEEEsingle();
+    APFloat FPLiteral(*FltSemantics, APInt(Size, Val));
+    FPLiteral.convert(APFloat::IEEEdouble(),
+                      APFloat::rmNearestTiesToEven,
+                      &Lost);
+    return FPLiteral.bitcastToAPInt().getZExtValue();
+  }
+
+  return Val;
+}
+
+bool isInlineAsmConst(uint64_t Val) {
+  return AMDGPU::isInlinableLiteral64(Val, true);
+}
+
 } // namespace AMDGPU
 } // namespace llvm
Index: llvm/test/CodeGen/AMDGPU/inline-constraints.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/inline-constraints.ll
+++ llvm/test/CodeGen/AMDGPU/inline-constraints.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+; RUN: not llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+; RUN: not llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=VI %s
+
+; RUN: not llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs 2>&1 | FileCheck --check-prefix=NOGCN --check-prefix=NOSI %s
+; RUN: not llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs 2>&1 | FileCheck --check-prefix=NOGCN %s
 
 ; GCN-LABEL: {{^}}inline_reg_constraints:
 ; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
@@ -74,3 +77,223 @@
   tail call void asm sideeffect "; use $0", "s"(double 1.0)
   ret void
 }
+
+;==============================================================================
+; 'A' constraint, 16-bit operand
+;==============================================================================
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_H0:
+; VI: v_mov_b32 {{v[0-9]+}}, 64
+define i32 @inline_A_constraint_H0() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 64)
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_H1:
+; VI: v_mov_b32 {{v[0-9]+}}, -16
+define i32 @inline_A_constraint_H1() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 -16)
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_H2:
+; VI: v_mov_b32 {{v[0-9]+}}, 1.0
+define i32 @inline_A_constraint_H2() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 1.0 to i16))
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_H3:
+; VI: v_mov_b32 {{v[0-9]+}}, -1.0
+define i32 @inline_A_constraint_H3() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half -1.0 to i16))
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_H4:
+; VI: v_mov_b32 {{v[0-9]+}}, 0.15915494309189532
+define i32 @inline_A_constraint_H4() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(half 0xH3118)
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_H5:
+; VI: v_mov_b32 {{v[0-9]+}}, 0.15915494309189532
+define i32 @inline_A_constraint_H5() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 0xH3118 to i16))
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_H6:
+; VI: v_mov_b32 {{v[0-9]+}}, -0.5
+define i32 @inline_A_constraint_H6() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(half -0.5)
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_H7() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 0xH3119 to i16))
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_H8() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 0xH3117 to i16))
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_H9() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 65)
+  ret i32 %v0
+}
+
+;==============================================================================
+; 'A' constraint, 32-bit operand
+;==============================================================================
+
+; GCN-LABEL: {{^}}inline_A_constraint_F0:
+; GCN: v_mov_b32 {{v[0-9]+}}, -16
+define i32 @inline_A_constraint_F0() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 -16)
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_F1:
+; GCN: v_mov_b32 {{v[0-9]+}}, 1
+define i32 @inline_A_constraint_F1() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 1)
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_F2:
+; GCN: v_mov_b32 {{v[0-9]+}}, -0.5
+define i32 @inline_A_constraint_F2() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 bitcast (float -0.5 to i32))
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_F3:
+; GCN: v_mov_b32 {{v[0-9]+}}, 2.0
+define i32 @inline_A_constraint_F3() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 bitcast (float 2.0 to i32))
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_F4:
+; GCN: v_mov_b32 {{v[0-9]+}}, -4.0
+define i32 @inline_A_constraint_F4() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(float -4.0)
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_F5:
+; VI: v_mov_b32 {{v[0-9]+}}, 0.15915494309189532
+define i32 @inline_A_constraint_F5() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 1042479491)
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_F6:
+; GCN: v_mov_b32 {{v[0-9]+}}, 0.5
+define i32 @inline_A_constraint_F6() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(float 0.5)
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_F7() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 1042479490)
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_F8() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 -17)
+  ret i32 %v0
+}
+
+;==============================================================================
+; 'A' constraint, 64-bit operand
+;==============================================================================
+
+; GCN-LABEL: {{^}}inline_A_constraint_D0:
+; GCN: v_mov_b32 {{v[0-9]+}}, -16
+define i32 @inline_A_constraint_D0() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i64 -16)
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_D1:
+; GCN: v_mov_b32 {{v[0-9]+}}, -2.0
+define i32 @inline_A_constraint_D1() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i64 bitcast (double -2.0 to i64))
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_D2:
+; GCN: v_mov_b32 {{v[0-9]+}}, 0.5
+define i32 @inline_A_constraint_D2() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(double 0.5)
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_D3:
+; VI: v_mov_b32 {{v[0-9]+}}, 0.15915494309189532
+define i32 @inline_A_constraint_D3() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(double 0.15915494309189532)
+  ret i32 %v0
+}
+
+; NOSI: error: invalid operand for inline asm constraint 'A'
+; VI-LABEL: {{^}}inline_A_constraint_D4:
+; VI: v_mov_b32 {{v[0-9]+}}, 0.15915494309189532
+define i32 @inline_A_constraint_D4() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i64 bitcast (double 0.15915494309189532 to i64))
+  ret i32 %v0
+}
+
+; GCN-LABEL: {{^}}inline_A_constraint_D5:
+; GCN: v_mov_b32 {{v[0-9]+}}, -2.0
+define i32 @inline_A_constraint_D5() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(double -2.0)
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_D8() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(double 1.1)
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_D9() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i64 bitcast (double 0.1 to i64))
+  ret i32 %v0
+}
+
+;==============================================================================
+; 'A' constraint, type errors
+;==============================================================================
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_E1(i32 %x) {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 %x)
+  ret i32 %v0
+}
+
+; NOGCN: error: invalid operand for inline asm constraint 'A'
+define i32 @inline_A_constraint_E2() {
+  %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i128 100000000000000000000)
+  ret i32 %v0
+}