Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp
===================================================================
--- lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -194,6 +194,18 @@
   return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
 }
 
+/// \returns true if the constant in \p Src should be replaced with a bitreverse
+/// of an inline immediate.
+static bool isReverseInlineImm(const SIInstrInfo *TII,
+                               const MachineOperand &Src,
+                               int32_t &ReverseImm) {
+  if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src, 4))
+    return false;
+
+  ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
+  return ReverseImm >= -16 && ReverseImm <= 64;
+}
+
 /// Copy implicit register operands from specified instruction to this
 /// instruction that are not part of the instruction definition.
 static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
@@ -240,14 +252,11 @@
         MachineOperand &Src = MI.getOperand(1);
         if (Src.isImm() &&
             TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
-          int64_t Imm = Src.getImm();
-          if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) {
-            int32_t ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Imm));
-            if (ReverseImm >= -16 && ReverseImm <= 64) {
-              MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
-              Src.setImm(ReverseImm);
-              continue;
-            }
+          int32_t ReverseImm;
+          if (isReverseInlineImm(TII, Src, ReverseImm)) {
+            MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
+            Src.setImm(ReverseImm);
+            continue;
           }
         }
       }
@@ -318,10 +327,17 @@
 
       // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
       if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
-        const MachineOperand &Src = MI.getOperand(1);
+        MachineOperand &Src = MI.getOperand(1);
 
-        if (Src.isImm() && isKImmOperand(TII, Src))
-          MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+        if (Src.isImm()) {
+          int32_t ReverseImm;
+          if (isKImmOperand(TII, Src))
+            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+          else if (isReverseInlineImm(TII, Src, ReverseImm)) {
+            MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
+            Src.setImm(ReverseImm);
+          }
+        }
 
         continue;
       }
Index: test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
===================================================================
--- test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
+++ test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
@@ -156,3 +156,66 @@
   store i64 508, i64 addrspace(1)* %out
   ret void
 }
+
+; GCN-LABEL: {{^}}s_materialize_0_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 0{{$}}
+define void @s_materialize_0_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_1_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 1{{$}}
+define void @s_materialize_1_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 1)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_neg1_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, -1{{$}}
+define void @s_materialize_neg1_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 -1)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_signbit_i32:
+; GCN: s_brev_b32 s{{[0-9]+}}, 1{{$}}
+define void @s_materialize_signbit_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 -2147483648)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_64_i32:
+; GCN: s_brev_b32 s{{[0-9]+}}, 64{{$}}
+define void @s_materialize_rev_64_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 33554432)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_65_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 0x82000000{{$}}
+define void @s_materialize_rev_65_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 -2113929216)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_neg16_i32:
+; GCN: s_brev_b32 s{{[0-9]+}}, -16{{$}}
+define void @s_materialize_rev_neg16_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 268435455)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_neg17_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 0xf7ffffff{{$}}
+define void @s_materialize_rev_neg17_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 -134217729)
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_1.0_i32:
+; GCN: s_movk_i32 s{{[0-9]+}}, 0x1fc{{$}}
+define void @s_materialize_rev_1.0_i32() {
+  call void asm sideeffect "; use $0", "s"(i32 508)
+  ret void
+}
Index: test/CodeGen/AMDGPU/fcopysign.f32.ll
===================================================================
--- test/CodeGen/AMDGPU/fcopysign.f32.ll
+++ test/CodeGen/AMDGPU/fcopysign.f32.ll
@@ -1,8 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-
 declare float @llvm.copysign.f32(float, float) nounwind readnone
 declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
@@ -15,7 +14,7 @@
 ; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30
 ; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
 ; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
-; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2
 ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
Index: test/CodeGen/AMDGPU/fcopysign.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/fcopysign.f64.ll
+++ test/CodeGen/AMDGPU/fcopysign.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
 
 declare double @llvm.copysign.f64(double, double) nounwind readnone
@@ -12,7 +12,7 @@
 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
 ; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
 ; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
-; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2
 ; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
 ; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
 ; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
Index: test/CodeGen/AMDGPU/llvm.round.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; FUNC-LABEL: {{^}}round_f64:
 ; SI: s_endpgm
@@ -20,7 +20,7 @@
 
 ; SI-DAG: v_cmp_eq_i32
 
-; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
+; SI-DAG: s_brev_b32 [[BFIMASK:s[0-9]+]], -2{{$}}
 ; SI-DAG: v_cmp_gt_i32
 ; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
 
Index: test/CodeGen/AMDGPU/llvm.round.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.round.ll
+++ test/CodeGen/AMDGPU/llvm.round.ll
@@ -4,7 +4,7 @@
 
 ; FUNC-LABEL: {{^}}round_f32:
 ; SI-DAG: s_load_dword [[SX:s[0-9]+]]
-; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff
+; SI-DAG: s_brev_b32 [[K:s[0-9]+]], -2{{$}}
 ; SI-DAG: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
 ; SI-DAG: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
 ; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]