Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h @@ -40,21 +40,29 @@ class MemIntrinsic; class MemSetInst; -/// \brief Assign a complexity or rank value to LLVM Values. +/// Assign a complexity or rank value to LLVM Values. This is used to reduce +/// the amount of pattern matching needed for compares and commutative +/// instructions. For example, if we have: +/// icmp ugt X, Constant +/// or +/// xor (add X, Constant), cast Z +/// +/// We do not have to consider the commuted variants of these patterns because +/// canonicalization based on complexity guarantees the above ordering. /// /// This routine maps IR values to various complexity ranks: /// 0 -> undef /// 1 -> Constants /// 2 -> Other non-instructions /// 3 -> Arguments -/// 3 -> Unary operations -/// 4 -> Other instructions +/// 4 -> Cast and (f)neg/not instructions +/// 5 -> Other instructions static inline unsigned getComplexity(Value *V) { if (isa(V)) { - if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || - BinaryOperator::isNot(V)) - return 3; - return 4; + if (isa(V) || BinaryOperator::isNeg(V) || + BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V)) + return 4; + return 5; } if (isa(V)) return 3; Index: llvm/trunk/test/Transforms/InstCombine/add.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/add.ll +++ llvm/trunk/test/Transforms/InstCombine/add.ll @@ -100,7 +100,7 @@ define i1 @test10(i8 %A, i8 %b) { ; CHECK-LABEL: @test10( ; CHECK-NEXT: [[B:%.*]] = sub i8 0, %b -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 %A, [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[B]], %A ; CHECK-NEXT: ret i1 [[C]] ; %B = add i8 %A, %b @@ -112,7 +112,7 @@ define <2 x i1> @test10vec(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @test10vec( ; CHECK-NEXT: [[C:%.*]] = sub <2 x i8> zeroinitializer, %b -; CHECK-NEXT: [[D:%.*]] = icmp ne <2 x i8> %a, [[C]] +; CHECK-NEXT: [[D:%.*]] = icmp ne <2 x i8> [[C]], %a ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = add <2 x i8> %a, %b Index: llvm/trunk/test/Transforms/InstCombine/and.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/and.ll +++ llvm/trunk/test/Transforms/InstCombine/and.ll @@ -176,7 +176,7 @@ define i8 @test17(i8 %X, i8 %Y) { ; CHECK-LABEL: @test17( ; CHECK-NEXT: [[Y_NOT:%.*]] = xor i8 %Y, -1 -; CHECK-NEXT: [[D:%.*]] = or i8 %X, [[Y_NOT]] +; CHECK-NEXT: [[D:%.*]] = or i8 [[Y_NOT]], %X ; CHECK-NEXT: ret i8 [[D]] ; %B = xor i8 %X, -1 Index: llvm/trunk/test/Transforms/InstCombine/apint-sub.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/apint-sub.ll +++ llvm/trunk/test/Transforms/InstCombine/apint-sub.ll @@ -50,7 +50,7 @@ define i57 @test6(i57 %A, i57 %B) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[B_NOT:%.*]] = xor i57 %B, -1 -; CHECK-NEXT: [[D:%.*]] = and i57 %A, [[B_NOT]] +; CHECK-NEXT: [[D:%.*]] = and i57 [[B_NOT]], %A ; CHECK-NEXT: ret i57 [[D]] ; %C = and i57 %A, %B Index: llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll +++ llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll @@ -144,7 +144,7 @@ ret i8* %n ; CHECK-LABEL: compare_ret_escape ; CHECK: %cmp = icmp eq i8* %n, %c -; CHECK: %cmp2 = icmp eq i32* %bc, %lgp +; CHECK: %cmp2 = icmp eq i32* %lgp, %bc } ; The malloc call for %m cannot be elided since it is used in the call to function f. Index: llvm/trunk/test/Transforms/InstCombine/icmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/icmp.ll +++ llvm/trunk/test/Transforms/InstCombine/icmp.ll @@ -918,7 +918,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %i to i16 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 %j to i16 ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], [[GEP1_IDX]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]] ; CHECK-NEXT: ret i1 [[TMP3]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* @@ -949,7 +949,7 @@ ; CHECK-LABEL: @test60_addrspacecast_smaller( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 %i, 2 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %j to i16 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[GEP1_IDX]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP2]] ; %bit = addrspacecast i8* %foo to i32 addrspace(1)* @@ -981,7 +981,7 @@ ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i64 %i ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8* %foo, i64 %j ; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8* -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[CAST1]], [[GEP2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]] ; CHECK-NEXT: ret i1 [[CMP]] ; %bit = bitcast i8* %foo to i32* @@ -999,7 +999,7 @@ ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 %i ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* %foo, i16 %j ; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)* -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 addrspace(1)* [[CAST1]], [[GEP2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]] ; CHECK-NEXT: ret i1 [[CMP]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* @@ -2807,7 +2807,7 @@ ; CHECK-LABEL: @cmp_sle_rhs_inc( ; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32 ; CHECK-NEXT: [[INC:%.*]] = add -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV]], [[INC]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[INC]], [[CONV]] ; CHECK-NEXT: ret i1 [[CMP]] ; %conv = fptosi float %x to i32 @@ -2820,7 +2820,7 @@ ; CHECK-LABEL: @cmp_ule_rhs_inc( ; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32 ; CHECK-NEXT: [[INC:%.*]] = add -; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[CONV]], [[INC]] +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC]], [[CONV]] ; CHECK-NEXT: ret i1 [[CMP]] ; %conv = fptosi float %x to i32 @@ -2833,7 +2833,7 @@ ; CHECK-LABEL: @cmp_slt_rhs_dec( ; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32 ; CHECK-NEXT: [[DEC:%.*]] = {{add|sub}} -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], [[DEC]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[CONV]] ; CHECK-NEXT: ret i1 [[CMP]] ; %conv = fptosi float %x to i32 @@ -2846,7 +2846,7 @@ ; CHECK-LABEL: @cmp_ult_rhs_dec( ; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32 ; CHECK-NEXT: [[DEC:%.*]] = {{add|sub}} -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[DEC]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[DEC]], [[CONV]] ; CHECK-NEXT: ret i1 [[CMP]] ; %conv = fptosi float %x to i32 Index: llvm/trunk/test/Transforms/InstCombine/narrow.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/narrow.ll +++ llvm/trunk/test/Transforms/InstCombine/narrow.ll @@ -212,8 +212,7 @@ ; FIXME: ; Narrowing should work with an 'xor' and is not limited to bool types. -; FIXME: -; We should either canonicalize based on complexity or enhance the pattern matching to catch this commuted variant. +; Test that commuting the xor operands does not inhibit optimization. define i32 @shrinkLogicAndPhi2(i8 %x, i1 %cond) { ; CHECK-LABEL: @shrinkLogicAndPhi2( @@ -224,7 +223,7 @@ ; CHECK: endif: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ] ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[LOGIC:%.*]] = xor i32 [[ZEXT]], [[PHI]] +; CHECK-NEXT: [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]] ; CHECK-NEXT: ret i32 [[LOGIC]] ; entry: Index: llvm/trunk/test/Transforms/InstCombine/or.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/or.ll +++ llvm/trunk/test/Transforms/InstCombine/or.ll @@ -490,7 +490,7 @@ ; CHECK-LABEL: @orsext_to_sel_multi_use( ; CHECK-NEXT: [[SEXT:%.*]] = sext i1 %y to i32 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEXT]], %x -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEXT]], [[OR]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[OR]], [[SEXT]] ; CHECK-NEXT: ret i32 [[ADD]] ; %sext = sext i1 %y to i32 Index: llvm/trunk/test/Transforms/InstCombine/select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select.ll +++ llvm/trunk/test/Transforms/InstCombine/select.ll @@ -190,7 +190,7 @@ define i1 @test63(i1 %A, i1 %B) { ; CHECK-LABEL: @test63( ; CHECK-NEXT: [[NOT:%.*]] = xor i1 %A, true -; CHECK-NEXT: [[C:%.*]] = or i1 %B, [[NOT]] +; CHECK-NEXT: [[C:%.*]] = or i1 [[NOT]], %B ; CHECK-NEXT: ret i1 [[C]] ; %not = xor i1 %A, true @@ -201,7 +201,7 @@ define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) { ; CHECK-LABEL: @test63vec( ; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> %A, -; CHECK-NEXT: [[C:%.*]] = or <2 x i1> %B, [[NOT]] +; CHECK-NEXT: [[C:%.*]] = or <2 x i1> [[NOT]], %B ; CHECK-NEXT: ret <2 x i1> [[C]] ; %not = xor <2 x i1> %A, Index: llvm/trunk/test/Transforms/InstCombine/sub.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/sub.ll +++ llvm/trunk/test/Transforms/InstCombine/sub.ll @@ -54,7 +54,7 @@ define i32 @test6(i32 %A, i32 %B) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[B_NOT:%.*]] = xor i32 %B, -1 -; CHECK-NEXT: [[D:%.*]] = and i32 %A, [[B_NOT]] +; CHECK-NEXT: [[D:%.*]] = and i32 [[B_NOT]], %A ; CHECK-NEXT: ret i32 [[D]] ; %C = and i32 %A, %B @@ -617,7 +617,7 @@ define i32 @test46(i32 %x, i32 %y) { ; CHECK-LABEL: @test46( ; CHECK-NEXT: [[X_NOT:%.*]] = xor i32 %x, -1 -; CHECK-NEXT: [[SUB:%.*]] = and i32 %y, [[X_NOT]] +; CHECK-NEXT: [[SUB:%.*]] = and i32 [[X_NOT]], %y ; CHECK-NEXT: ret i32 [[SUB]] ; %or = or i32 %x, %y Index: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -67,7 +67,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]] ; CHECK-NEXT: ret i64 [[TMP15]] ; %v00 = insertelement <4 x float> undef, float %f, i32 0 Index: llvm/trunk/test/Transforms/InstCombine/vec_sext.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec_sext.ll +++ llvm/trunk/test/Transforms/InstCombine/vec_sext.ll @@ -6,7 +6,7 @@ ; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, %a ; CHECK-NEXT: [[B_LOBIT:%.*]] = ashr <4 x i32> %b, ; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[B_LOBIT]], -; CHECK-NEXT: [[T2:%.*]] = and <4 x i32> %a, [[T1]] +; CHECK-NEXT: [[T2:%.*]] = and <4 x i32> [[T1]], %a ; CHECK-NEXT: [[T3:%.*]] = and <4 x i32> [[B_LOBIT]], [[SUB]] ; CHECK-NEXT: [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]] ; CHECK-NEXT: ret <4 x i32> [[COND]] Index: llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll +++ llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll @@ -781,7 +781,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]] ; CHECK-NEXT: ret i64 [[TMP15]] ; %v00 = insertelement <4 x float> undef, float %f, i32 0 @@ -861,7 +861,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]] ; CHECK-NEXT: ret i64 [[TMP15]] ; %v00 = insertelement <4 x float> undef, float %f, i32 0 Index: llvm/trunk/test/Transforms/InstCombine/xor.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/xor.ll +++ llvm/trunk/test/Transforms/InstCombine/xor.ll @@ -321,7 +321,7 @@ define i32 @test26(i32 %a, i32 %b) { ; CHECK-LABEL: @test26( -; CHECK-NEXT: [[T4:%.*]] = and i32 %a, %b +; CHECK-NEXT: [[T4:%.*]] = and i32 %b, %a ; CHECK-NEXT: ret i32 [[T4]] ; %b2 = xor i32 %b, -1 Index: llvm/trunk/test/Transforms/InstCombine/xor2.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/xor2.ll +++ llvm/trunk/test/Transforms/InstCombine/xor2.ll @@ -110,7 +110,7 @@ define i32 @test7(i32 %a, i32 %b) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[B_NOT:%.*]] = xor i32 %b, -1 -; CHECK-NEXT: [[XOR:%.*]] = or i32 %a, [[B_NOT]] +; CHECK-NEXT: [[XOR:%.*]] = or i32 [[B_NOT]], %a ; CHECK-NEXT: ret i32 [[XOR]] ; %or = or i32 %a, %b @@ -123,7 +123,7 @@ define i32 @test8(i32 %a, i32 %b) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[B_NOT:%.*]] = xor i32 %b, -1 -; CHECK-NEXT: [[XOR:%.*]] = or i32 %a, [[B_NOT]] +; CHECK-NEXT: [[XOR:%.*]] = or i32 [[B_NOT]], %a ; CHECK-NEXT: ret i32 [[XOR]] ; %neg = xor i32 %a, -1