Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18276,9 +18276,17 @@
   }
 
   // fold select C, 16, 0 -> shl C, 4
-  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
-      TLI.getBooleanContents(N0.getValueType()) ==
-          TargetLowering::ZeroOrOneBooleanContent) {
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+  bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+  bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+  if ((Fold || Swap) && TLI.getBooleanContents(N0.getValueType()) ==
+       TargetLowering::ZeroOrOneBooleanContent) {
+
+    if (Swap) {
+      CC = ISD::getSetCCInverse(CC, N0.getValueType().isInteger());
+      std::swap(N2C, N3C);
+    }
 
     // If the caller doesn't want us to simplify this into a zext of a compare,
     // don't do it.
Index: test/CodeGen/AArch64/select_cc.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/select_cc.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=arm64 | FileCheck %s
+
+; CHECK_LABEL: select_ogt_float
+; CHECK:       fcmp s0, s1
+; CHECK_NEXT:  cset w8, gt
+; CHECK_NEXT:  lsl x0, x8, #2
+define i64 @select_ogt_float(float %a, float %b) {
+entry:
+  %cc = fcmp ogt float %a, %b
+  %sel = select i1 %cc, i64 4, i64 0
+  ret i64 %sel
+}
+
+; CHECK_LABEL: select_ule_float_inverse
+; CHECK:       fcmp s0, s1
+; CHECK_NEXT:  cset w8, gt
+; CHECK_NEXT:  lsl x0, x8, #2
+define i64 @select_ule_float_inverse(float %a, float %b) {
+entry:
+  %cc = fcmp ule float %a, %b
+  %sel = select i1 %cc, i64 0, i64 4
+  ret i64 %sel
+}
+
+; CHECK_LABEL: select_eq_i32
+; CHECK:       cmp w0, w1
+; CHECK_NEXT:  cset w8, eq
+; CHECK_NEXT:  lsl x0, x8, #2
+define i64 @select_eq_i32(i32 %a, i32 %b) {
+entry:
+  %cc = icmp eq i32 %a, %b
+  %sel = select i1 %cc, i64 4, i64 0
+  ret i64 %sel
+}
+
+; CHECK_LABEL: select_ne_i32_inverse
+; CHECK:       cmp w0, w1
+; CHECK_NEXT:  cset w8, eq
+; CHECK_NEXT:  lsl x0, x8, #2
+define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
+entry:
+  %cc = icmp ne i32 %a, %b
+  %sel = select i1 %cc, i64 0, i64 4
+  ret i64 %sel
+}
Index: test/CodeGen/Thumb/branchless-cmp.ll
===================================================================
--- test/CodeGen/Thumb/branchless-cmp.ll
+++ test/CodeGen/Thumb/branchless-cmp.ll
@@ -77,8 +77,6 @@
 ; CHECK-NEXT: lsls	r0, r1, #2
 }
 
-; FIXME: This one hasn't changed actually
-; but could look like test3b
 define i32 @test4a(i32 %a, i32 %b) {
 entry:
   %cmp = icmp ne i32 %a, %b
@@ -86,13 +84,11 @@
   ret i32 %cond
 ; CHECK-LABEL: test4a:
 ; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:       mov     r2, r0
-; CHECK-NEXT:  movs    r0, #0
-; CHECK-NEXT:  movs    r3, #4
-; CHECK-NEXT:  cmp     r2, r1
-; CHECK-NEXT:  bne     .[[BRANCH:[A-Z0-9_]+]]
-; CHECK:       mov     r0, r3
-; CHECK:       .[[BRANCH]]:
+; CHECK:      subs	r0, r0, r1
+; CHECK-NEXT: movs	r1, #0
+; CHECK-NEXT: subs	r1, r1, r0
+; CHECK-NEXT: adcs	r1, r0
+; CHECK-NEXT: lsls	r0, r1, #2
 }
 
 define i32 @test4b(i32 %a, i32 %b) {