Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -449,7 +449,7 @@
            STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
     O << "0.15915494309189532";
   else {
-    assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
+    assert(isInt<32>(Imm) || isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
 
     // In rare situations, we will have a 32-bit literal in a 64-bit
     // operand. This is technically allowed for the encoding of s_mov_b64.
Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -105,6 +105,7 @@
 
   std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
   bool tryFoldOMod(MachineInstr &MI);
+  bool tryFoldRegSeqence(MachineInstr &MI);
 
 public:
   SIFoldOperands() : MachineFunctionPass(ID) {
@@ -1463,6 +1464,35 @@
   return true;
 }
 
+// Try to fold 64-bit immediate reg_sequence into uses.
+bool SIFoldOperands::tryFoldRegSeqence(MachineInstr &MI) {
+  assert(MI.isRegSequence());
+  auto Reg = MI.getOperand(0).getReg();
+  SmallVector<std::pair<MachineOperand*, unsigned>, 32> Defs;
+
+  if (Reg.isPhysical())
+    return false;
+
+  if (TII->getOpSize(MI, 0) != 8 ||
+      !getRegSeqInit(Defs, Reg, AMDGPU::OPERAND_REG_IMM_INT32, TII, *MRI))
+    return false;
+
+  assert(Defs.size() == 2);
+  if (!Defs[0].first->isImm() || !Defs[1].first->isImm())
+    return false;
+
+  uint64_t Lit = ((Defs[0].first->getImm() & 0xffffffff) <<
+                  (Defs[0].second == AMDGPU::sub0 ? 0 : 32)) |
+                 ((Defs[1].first->getImm() & 0xffffffff) <<
+                  (Defs[1].second == AMDGPU::sub0 ? 0 : 32));
+
+  MI.addOperand(MachineOperand::CreateImm(Lit));
+  foldInstOperand(MI, MI.getOperand(MI.getNumOperands() - 1));
+  MI.RemoveOperand(MI.getNumOperands() - 1);
+
+  return true;
+}
+
 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -1490,6 +1520,9 @@
 
       tryFoldInst(TII, &MI);
 
+      if (MI.isRegSequence() && tryFoldRegSeqence(MI))
+        continue;
+
       if (!TII->isFoldableCopy(MI)) {
         // Saw an unknown clobber of m0, so we no longer know what it is.
         if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2850,7 +2850,8 @@
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP32: {
     int32_t Trunc = static_cast<int32_t>(Imm);
-    return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
+    return (isInt<32>(Imm) || isUInt<32>(Imm)) &&
+           AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
   }
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -2950,6 +2951,12 @@
   if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
     return false;
 
+  if (MO.isImm()) {
+    int64_t Imm = MO.getImm();
+    if (!isInt<32>(Imm) && !isUInt<32>(Imm))
+      return false;
+  }
+
   if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
     return true;
 
Index: llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -179,10 +179,8 @@
 ; GCN-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; GCN-NEXT:    s_cbranch_vccz BB4_2
 ; GCN-NEXT:  ; %bb.1:
-; GCN-NEXT:    s_mov_b32 s4, 0
-; GCN-NEXT:    s_mov_b32 s5, s4
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v1, s5
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    s_branch BB4_3
 ; GCN-NEXT:  BB4_2: ; %if.else
 ; GCN-NEXT:    s_getpc_b64 s[4:5]
@@ -223,10 +221,8 @@
 ; GCN-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; GCN-NEXT:    s_cbranch_vccz BB5_2
 ; GCN-NEXT:  ; %bb.1:
-; GCN-NEXT:    s_mov_b32 s4, 0
-; GCN-NEXT:    s_mov_b32 s5, s4
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v1, s5
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    s_branch BB5_3
 ; GCN-NEXT:  BB5_2: ; %if.else
 ; GCN-NEXT:    s_getpc_b64 s[4:5]
Index: llvm/test/CodeGen/AMDGPU/inline-constraints.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/inline-constraints.ll
+++ llvm/test/CodeGen/AMDGPU/inline-constraints.ll
@@ -58,8 +58,7 @@
 
 ; FIXME: Should be able to use s_mov_b64
 ; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64:
-; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}}
-; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}}
+; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], -4{{$}}
 ; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
 define amdgpu_kernel void @inline_sreg_constraint_imm_i64() {
   tail call void asm sideeffect "; use $0", "s"(i64 -4)
@@ -74,3 +73,120 @@
   tail call void asm sideeffect "; use $0", "s"(double 1.0)
   ret void
 }
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m4:
+; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], -4{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m4() {
+  tail call void asm sideeffect "; use $0", "s"(i64 -4)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_4_0:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 4.0{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_4_0() {
+  tail call void asm sideeffect "; use $0", "s"(i64 4647714815446351872)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m4_0:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -4.0{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m4_0() {
+  tail call void asm sideeffect "; use $0", "s"(i64 13871086852301127680)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1:
+; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], 1{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1() {
+  tail call void asm sideeffect "; use $0", "s"(i64 1)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_4_m1:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -1{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 4.0{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_4_m1() {
+  tail call void asm sideeffect "; use $0", "s"(i64 4647714819741319167)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m1_4:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 4.0{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m1_4() {
+  tail call void asm sideeffect "; use $0", "s"(i64 18446744070496714752)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m1_m4:
+; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], 0xffffffffc0800000{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m1_m4() {
+  tail call void asm sideeffect "; use $0", "s"(i64 18446744072644198400)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1_4:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 4.0{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 1{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1_4() {
+  tail call void asm sideeffect "; use $0", "s"(i64 5377097728)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1_m4:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4.0{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 1{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1_m4() {
+  tail call void asm sideeffect "; use $0", "s"(i64 7524581376)
+  ret void
+}
+
+; FIXME: Should be able to use s_mov_b64
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_100:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0x42c80000{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_100() {
+  tail call void asm sideeffect "; use $0", "s"(i64 1120403456)
+  ret void
+}
+
+; FIXME: Should be able to use s_mov_b64
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m100:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0xc2c80000{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m100() {
+  tail call void asm sideeffect "; use $0", "s"(i64 3267887104)
+  ret void
+}
+
+; FIXME: Should be able to use s_mov_b64
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m1_m100:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0xc2c80000{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m1_m100() {
+  tail call void asm sideeffect "; use $0", "s"(i64 18446744072682471424)
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1_m100:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0xc2c80000{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 1{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1_m100() {
+  tail call void asm sideeffect "; use $0", "s"(i64 7562854400)
+  ret void
+}
Index: llvm/test/CodeGen/AMDGPU/shl.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/shl.ll
+++ llvm/test/CodeGen/AMDGPU/shl.ll
@@ -426,9 +426,7 @@
 ; low 32-bits, which is not a valid 64-bit inline immmediate.
 
 ; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_4.0_i64:
-; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0
-; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}}
-; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}}
+; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 0x40800000, s{{[0-9]+}}
 define amdgpu_kernel void @s_shl_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
   %shl = shl i64 1082130432, %a
   store i64 %shl, i64 addrspace(1)* %out, align 8
@@ -437,10 +435,7 @@
 
 ; FIXME: Copy of -1 register
 ; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_neg_4.0_i64:
-; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0
-; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}}
-; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]]
-; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}}, s{{[0-9]+}}
+; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 0xffffffffc0800000, s{{[0-9]+}}
 define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
   %shl = shl i64 -1065353216, %a
   store i64 %shl, i64 addrspace(1)* %out, align 8