Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -40,21 +40,29 @@
 class MemIntrinsic;
 class MemSetInst;
 
-/// \brief Assign a complexity or rank value to LLVM Values.
+/// Assign a complexity or rank value to LLVM Values. This is used to reduce
+/// the amount of pattern matching needed for compares and commutative
+/// instructions. For example, if we have:
+///   icmp ugt X, Constant
+/// or
+///   xor (add X, Constant), cast Z
+///
+/// We do not have to consider the commuted variants of these patterns because
+/// canonicalization based on complexity guarantees the above ordering.
 ///
 /// This routine maps IR values to various complexity ranks:
 ///   0 -> undef
 ///   1 -> Constants
 ///   2 -> Other non-instructions
 ///   3 -> Arguments
-///   3 -> Unary operations
-///   4 -> Other instructions
+///   4 -> Cast and (f)neg/not instructions
+///   5 -> Other instructions
 static inline unsigned getComplexity(Value *V) {
   if (isa<Instruction>(V)) {
-    if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
-        BinaryOperator::isNot(V))
-      return 3;
-    return 4;
+    if (isa<CastInst>(V) || BinaryOperator::isNeg(V) ||
+        BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V))
+      return 4;
+    return 5;
   }
   if (isa<Argument>(V))
     return 3;
Index: llvm/trunk/test/Transforms/InstCombine/add.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/add.ll
+++ llvm/trunk/test/Transforms/InstCombine/add.ll
@@ -100,7 +100,7 @@
 define i1 @test10(i8 %A, i8 %b) {
 ; CHECK-LABEL: @test10(
 ; CHECK-NEXT:    [[B:%.*]] = sub i8 0, %b
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 %A, [[B]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[B]], %A
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %B = add i8 %A, %b
@@ -112,7 +112,7 @@
 define <2 x i1> @test10vec(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @test10vec(
 ; CHECK-NEXT:    [[C:%.*]] = sub <2 x i8> zeroinitializer, %b
-; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> %a, [[C]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> [[C]], %a
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %c = add <2 x i8> %a, %b
Index: llvm/trunk/test/Transforms/InstCombine/and.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/and.ll
+++ llvm/trunk/test/Transforms/InstCombine/and.ll
@@ -176,7 +176,7 @@
 define i8 @test17(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @test17(
 ; CHECK-NEXT:    [[Y_NOT:%.*]] = xor i8 %Y, -1
-; CHECK-NEXT:    [[D:%.*]] = or i8 %X, [[Y_NOT]]
+; CHECK-NEXT:    [[D:%.*]] = or i8 [[Y_NOT]], %X
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %B = xor i8 %X, -1
Index: llvm/trunk/test/Transforms/InstCombine/apint-sub.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-sub.ll
+++ llvm/trunk/test/Transforms/InstCombine/apint-sub.ll
@@ -50,7 +50,7 @@
 define i57 @test6(i57 %A, i57 %B) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[B_NOT:%.*]] = xor i57 %B, -1
-; CHECK-NEXT:    [[D:%.*]] = and i57 %A, [[B_NOT]]
+; CHECK-NEXT:    [[D:%.*]] = and i57 [[B_NOT]], %A
 ; CHECK-NEXT:    ret i57 [[D]]
 ;
   %C = and i57 %A, %B
Index: llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll
+++ llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll
@@ -144,7 +144,7 @@
   ret i8* %n
 ; CHECK-LABEL: compare_ret_escape
 ; CHECK: %cmp = icmp eq i8* %n, %c
-; CHECK: %cmp2 = icmp eq i32* %bc, %lgp
+; CHECK: %cmp2 = icmp eq i32* %lgp, %bc
 }
 
 ; The malloc call for %m cannot be elided since it is used in the call to function f.
Index: llvm/trunk/test/Transforms/InstCombine/icmp.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/icmp.ll
+++ llvm/trunk/test/Transforms/InstCombine/icmp.ll
@@ -918,7 +918,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %i to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 %j to i16
 ; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], [[GEP1_IDX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]]
 ; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@@ -949,7 +949,7 @@
 ; CHECK-LABEL: @test60_addrspacecast_smaller(
 ; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 %i, 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %j to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[GEP1_IDX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %bit = addrspacecast i8* %foo to i32 addrspace(1)*
@@ -981,7 +981,7 @@
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i64 %i
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8* %foo, i64 %j
 ; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8*
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8* [[CAST1]], [[GEP2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %bit = bitcast i8* %foo to i32*
@@ -999,7 +999,7 @@
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 %i
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
 ; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)*
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 addrspace(1)* [[CAST1]], [[GEP2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@@ -2807,7 +2807,7 @@
 ; CHECK-LABEL: @cmp_sle_rhs_inc(
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
 ; CHECK-NEXT:    [[INC:%.*]] = add
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[CONV]], [[INC]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[INC]], [[CONV]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %conv = fptosi float %x to i32
@@ -2820,7 +2820,7 @@
 ; CHECK-LABEL: @cmp_ule_rhs_inc(
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
 ; CHECK-NEXT:    [[INC:%.*]] = add
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[CONV]], [[INC]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[INC]], [[CONV]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %conv = fptosi float %x to i32
@@ -2833,7 +2833,7 @@
 ; CHECK-LABEL: @cmp_slt_rhs_dec(
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
 ; CHECK-NEXT:    [[DEC:%.*]] = {{add|sub}}
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CONV]], [[DEC]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[CONV]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %conv = fptosi float %x to i32
@@ -2846,7 +2846,7 @@
 ; CHECK-LABEL: @cmp_ult_rhs_dec(
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
 ; CHECK-NEXT:    [[DEC:%.*]] = {{add|sub}}
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[CONV]], [[DEC]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC]], [[CONV]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %conv = fptosi float %x to i32
Index: llvm/trunk/test/Transforms/InstCombine/narrow.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/narrow.ll
+++ llvm/trunk/test/Transforms/InstCombine/narrow.ll
@@ -212,8 +212,7 @@
 
 ; FIXME:
 ; Narrowing should work with an 'xor' and is not limited to bool types.
-; FIXME:
-; We should either canonicalize based on complexity or enhance the pattern matching to catch this commuted variant.
+; Test that commuting the xor operands does not inhibit optimization.
 
 define i32 @shrinkLogicAndPhi2(i8 %x, i1 %cond) {
 ; CHECK-LABEL: @shrinkLogicAndPhi2(
@@ -224,7 +223,7 @@
 ; CHECK:       endif:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ]
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
-; CHECK-NEXT:    [[LOGIC:%.*]] = xor i32 [[ZEXT]], [[PHI]]
+; CHECK-NEXT:    [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]]
 ; CHECK-NEXT:    ret i32 [[LOGIC]]
 ;
 entry:
Index: llvm/trunk/test/Transforms/InstCombine/or.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/or.ll
+++ llvm/trunk/test/Transforms/InstCombine/or.ll
@@ -490,7 +490,7 @@
 ; CHECK-LABEL: @orsext_to_sel_multi_use(
 ; CHECK-NEXT:    [[SEXT:%.*]] = sext i1 %y to i32
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEXT]], %x
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEXT]], [[OR]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[OR]], [[SEXT]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %sext = sext i1 %y to i32
Index: llvm/trunk/test/Transforms/InstCombine/select.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/select.ll
+++ llvm/trunk/test/Transforms/InstCombine/select.ll
@@ -190,7 +190,7 @@
 define i1 @test63(i1 %A, i1 %B) {
 ; CHECK-LABEL: @test63(
 ; CHECK-NEXT:    [[NOT:%.*]] = xor i1 %A, true
-; CHECK-NEXT:    [[C:%.*]] = or i1 %B, [[NOT]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[NOT]], %B
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %not = xor i1 %A, true
@@ -201,7 +201,7 @@
 define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) {
 ; CHECK-LABEL: @test63vec(
 ; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i1> %A, <i1 true, i1 true>
-; CHECK-NEXT:    [[C:%.*]] = or <2 x i1> %B, [[NOT]]
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i1> [[NOT]], %B
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %not = xor <2 x i1> %A, <i1 true, i1 true>
Index: llvm/trunk/test/Transforms/InstCombine/sub.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/sub.ll
+++ llvm/trunk/test/Transforms/InstCombine/sub.ll
@@ -54,7 +54,7 @@
 define i32 @test6(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 %B, -1
-; CHECK-NEXT:    [[D:%.*]] = and i32 %A, [[B_NOT]]
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[B_NOT]], %A
 ; CHECK-NEXT:    ret i32 [[D]]
 ;
   %C = and i32 %A, %B
@@ -617,7 +617,7 @@
 define i32 @test46(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test46(
 ; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 %x, -1
-; CHECK-NEXT:    [[SUB:%.*]] = and i32 %y, [[X_NOT]]
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[X_NOT]], %y
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %or = or i32 %x, %y
Index: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    ret i64 [[TMP15]]
 ;
   %v00 = insertelement <4 x float> undef, float %f, i32 0
Index: llvm/trunk/test/Transforms/InstCombine/vec_sext.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_sext.ll
+++ llvm/trunk/test/Transforms/InstCombine/vec_sext.ll
@@ -6,7 +6,7 @@
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, %a
 ; CHECK-NEXT:    [[B_LOBIT:%.*]] = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
 ; CHECK-NEXT:    [[T1:%.*]] = xor <4 x i32> [[B_LOBIT]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T2:%.*]] = and <4 x i32> %a, [[T1]]
+; CHECK-NEXT:    [[T2:%.*]] = and <4 x i32> [[T1]], %a
 ; CHECK-NEXT:    [[T3:%.*]] = and <4 x i32> [[B_LOBIT]], [[SUB]]
 ; CHECK-NEXT:    [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]]
 ; CHECK-NEXT:    ret <4 x i32> [[COND]]
Index: llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll
+++ llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll
@@ -781,7 +781,7 @@
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    ret i64 [[TMP15]]
 ;
   %v00 = insertelement <4 x float> undef, float %f, i32 0
@@ -861,7 +861,7 @@
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    ret i64 [[TMP15]]
 ;
   %v00 = insertelement <4 x float> undef, float %f, i32 0
Index: llvm/trunk/test/Transforms/InstCombine/xor.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/xor.ll
+++ llvm/trunk/test/Transforms/InstCombine/xor.ll
@@ -321,7 +321,7 @@
 
 define i32 @test26(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test26(
-; CHECK-NEXT:    [[T4:%.*]] = and i32 %a, %b
+; CHECK-NEXT:    [[T4:%.*]] = and i32 %b, %a
 ; CHECK-NEXT:    ret i32 [[T4]]
 ;
   %b2 = xor i32 %b, -1
Index: llvm/trunk/test/Transforms/InstCombine/xor2.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/xor2.ll
+++ llvm/trunk/test/Transforms/InstCombine/xor2.ll
@@ -110,7 +110,7 @@
 define i32 @test7(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 %b, -1
-; CHECK-NEXT:    [[XOR:%.*]] = or i32 %a, [[B_NOT]]
+; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[B_NOT]], %a
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %or = or i32 %a, %b
@@ -123,7 +123,7 @@
 define i32 @test8(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 %b, -1
-; CHECK-NEXT:    [[XOR:%.*]] = or i32 %a, [[B_NOT]]
+; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[B_NOT]], %a
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %neg = xor i32 %a, -1