Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1167,6 +1167,23 @@ if (Instruction *I = canonicalizeSelectToShuffle(SI)) return I; + // Canonicalize a one-use integer compare with a non-canonical predicate by + // inverting the predicate and swapping the select operands. This matches a + // compare canonicalization for conditional branches. + // TODO: Should we do the same for FP compares? + CmpInst::Predicate Pred; + if (match(CondVal, m_OneUse(m_ICmp(Pred, m_Value(), m_Value()))) && + !isCanonicalPredicate(Pred)) { + // Swap true/false values and condition. + CmpInst *Cond = cast(CondVal); + Cond->setPredicate(CmpInst::getInversePredicate(Pred)); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + SI.swapProfMetadata(); + Worklist.Add(Cond); + return &SI; + } + if (SelType->getScalarType()->isIntegerTy(1) && TrueVal->getType() == CondVal->getType()) { if (match(TrueVal, m_One())) { Index: llvm/trunk/test/Transforms/InstCombine/ffs-1.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/ffs-1.ll +++ llvm/trunk/test/Transforms/InstCombine/ffs-1.ll @@ -150,8 +150,8 @@ ; ALL-LABEL: @test_simplify13( ; ALL-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) ; ALL-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1 -; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0 -; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 +; ALL-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0 +; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] ; ALL-NEXT: ret i32 [[TMP3]] ; %ret = call i32 @ffs(i32 %x) @@ -166,8 +166,8 @@ ; TARGET-LABEL: @test_simplify14( ; TARGET-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) ; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1 -; TARGET-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0 -; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 +; TARGET-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0 +; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] ; TARGET-NEXT: ret i32 [[TMP3]] ; %ret = call i32 @ffsl(i32 %x) @@ -183,8 +183,8 @@ ; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true) ; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[CTTZ]], 1 ; TARGET-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; TARGET-NEXT: [[TMP3:%.*]] = icmp ne i64 %x, 0 -; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 +; TARGET-NEXT: [[TMP3:%.*]] = icmp eq i64 %x, 0 +; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]] ; TARGET-NEXT: ret i32 [[TMP4]] ; %ret = call i32 @ffsll(i64 %x) Index: llvm/trunk/test/Transforms/InstCombine/icmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/icmp.ll +++ llvm/trunk/test/Transforms/InstCombine/icmp.ll @@ -2423,8 +2423,8 @@ ; CHECK-LABEL: @f7( ; CHECK-NEXT: [[CMP_UNSHIFTED:%.*]] = xor i32 %a, %b ; CHECK-NEXT: [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 511 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CMP:%.*]].mask, 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 10000, i32 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CMP_MASK]], 0 +; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 0, i32 10000 ; CHECK-NEXT: ret i32 [[S]] ; %sext = shl i32 %a, 23 Index: llvm/trunk/test/Transforms/InstCombine/logical-select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/logical-select.ll +++ llvm/trunk/test/Transforms/InstCombine/logical-select.ll @@ -62,19 +62,15 @@ ret i32 %t3 } -; TODO: For the next 4 tests, are there potential canonicalizations and/or folds for these -; in InstCombine? Independent of that, tests like this that may not show any transforms -; still have value because they can help identify conflicting canonicalization rules that -; lead to infinite looping. - ; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791 -; Fold two selects with inverted predicates and zero operands. +; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. + define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: @fold_inverted_icmp_preds( ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 %c, i32 0 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %a, %b -; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[OR]] ; @@ -86,12 +82,14 @@ ret i32 %or } +; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. + define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: @fold_inverted_icmp_preds_reverse( ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 %c -; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %a, %b -; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[OR]] ; @@ -103,6 +101,8 @@ ret i32 %or } +; TODO: Should fcmp have the same sort of predicate canonicalization as icmp? + define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) { ; CHECK-LABEL: @fold_inverted_fcmp_preds( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float %a, %b @@ -120,10 +120,12 @@ ret i32 %or } +; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. + define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { ; CHECK-LABEL: @fold_inverted_icmp_vector_preds( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> %a, %b -; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> %c, <2 x i32> zeroinitializer +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq <2 x i32> %a, %b +; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> %c ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> %a, %b ; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> %d, <2 x i32> zeroinitializer ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]] Index: llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll +++ llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll @@ -93,14 +93,15 @@ ; negative test case (i.e. can not simplify) : ABS(MIN(NOT x,y)) define i32 @abs_of_min_of_not(i32 %x, i32 %y) { ; CHECK-LABEL: @abs_of_min_of_not( -; CHECK-NEXT: xor -; CHECK-NEXT: add -; CHECK-NEXT: icmp sge -; CHECK-NEXT: select -; CHECK-NEXT: icmp sgt -; CHECK-NEXT: sub -; CHECK-NEXT: select -; CHECK-NEXT: ret +; CHECK-NEXT: [[XORD:%.*]] = xor i32 %x, -1 +; CHECK-NEXT: [[YADD:%.*]] = add i32 %y, 2 +; CHECK-NEXT: [[COND_I:%.*]] = icmp slt i32 [[YADD]], [[XORD]] +; CHECK-NEXT: [[MIN:%.*]] = select i1 [[COND_I]], i32 [[YADD]], i32 [[XORD]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[MIN]], -1 +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[MIN]] +; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP2]], i32 [[MIN]], i32 [[SUB]] +; CHECK-NEXT: ret i32 [[ABS]] +; %xord = xor i32 %x, -1 %yadd = add i32 %y, 2 Index: llvm/trunk/test/Transforms/InstCombine/select-with-bitwise-ops.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ llvm/trunk/test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -104,10 +104,10 @@ define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8( -; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1073741824 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], 8 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[Y]], i8 [[OR]] +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 1073741824 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i8 %y, 8 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[OR]], i8 %y ; CHECK-NEXT: ret i8 [[SELECT]] ; %and = and i32 %x, 1073741824 @@ -119,10 +119,10 @@ define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824( -; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 1073741824 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] +; CHECK-NEXT: [[AND:%.*]] = and i8 %x, 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 1073741824 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 %y ; CHECK-NEXT: ret i32 [[SELECT]] ; %and = and i8 %x, 8 @@ -271,8 +271,8 @@ define i32 @test66(i64 %x) { ; CHECK-LABEL: @test66( ; CHECK-NEXT: [[TMP1:%.*]] = and i64 %x, 4294967296 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 40, i32 42 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40 ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = and i64 %x, 4294967296 @@ -376,10 +376,10 @@ define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) { ; CHECK-LABEL: @shift_xor_multiuse_or( -; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 2048 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 %y ; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]] ; CHECK-NEXT: ret i32 [[RES]] ; @@ -430,11 +430,11 @@ define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { ; CHECK-LABEL: @no_shift_xor_multiuse_cmp( -; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]] -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], %y +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 %w, i32 %z ; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]] ; CHECK-NEXT: ret i32 [[RES]] ; Index: llvm/trunk/test/Transforms/InstCombine/select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select.ll +++ llvm/trunk/test/Transforms/InstCombine/select.ll @@ -1220,12 +1220,13 @@ } define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) { - ; CHECK-LABEL: @test_select_select0( - ; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1 - ; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2 - ; CHECK-NEXT: %[[C:.*]] = and i1 %[[C1]], %[[C0]] - ; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1 - ; CHECK-NEXT: ret i32 %[[SEL]] +; CHECK-LABEL: @test_select_select0( +; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 %a, %v1 +; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 %r1, i32 %r0 +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 %a, %v2 +; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 [[S0]], i32 %r1 +; CHECK-NEXT: ret i32 [[S1]] +; %c0 = icmp sge i32 %a, %v1 %s0 = select i1 %c0, i32 %r0, i32 %r1 %c1 = icmp slt i32 %a, %v2 @@ -1234,12 +1235,13 @@ } define i32 @test_select_select1(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) { - ; CHECK-LABEL: @test_select_select1( - ; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1 - ; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2 - ; CHECK-NEXT: %[[C:.*]] = or i1 %[[C1]], %[[C0]] - ; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1 - ; CHECK-NEXT: ret i32 %[[SEL]] +; CHECK-LABEL: @test_select_select1( +; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 %a, %v1 +; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 %r1, i32 %r0 +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 %a, %v2 +; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 %r0, i32 [[S0]] +; CHECK-NEXT: ret i32 [[S1]] +; %c0 = icmp sge i32 %a, %v1 %s0 = select i1 %c0, i32 %r0, i32 %r1 %c1 = icmp slt i32 %a, %v2 Index: llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll +++ llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll @@ -18,7 +18,7 @@ ;CHECK-LABEL: @function0( ;CHECK: load <4 x i32> -;CHECK: icmp sle <4 x i32> +;CHECK: icmp sgt <4 x i32> ;CHECK: mul <4 x i32> ;CHECK: add <4 x i32> ;CHECK: select <4 x i1> Index: llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll +++ llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -244,7 +244,7 @@ ; SGE -> SLT ; Turn this into a min reduction (select inputs are reversed). ; CHECK-LABEL: @sge_min_red( -; CHECK: icmp sge <2 x i32> +; CHECK: icmp slt <2 x i32> ; CHECK: select <2 x i1> ; CHECK: middle.block ; CHECK: icmp slt <2 x i32> @@ -273,7 +273,7 @@ ; SLE -> SGT ; Turn this into a max reduction (select inputs are reversed). ; CHECK-LABEL: @sle_min_red( -; CHECK: icmp sle <2 x i32> +; CHECK: icmp sgt <2 x i32> ; CHECK: select <2 x i1> ; CHECK: middle.block ; CHECK: icmp sgt <2 x i32> @@ -302,7 +302,7 @@ ; UGE -> ULT ; Turn this into a min reduction (select inputs are reversed). ; CHECK-LABEL: @uge_min_red( -; CHECK: icmp uge <2 x i32> +; CHECK: icmp ult <2 x i32> ; CHECK: select <2 x i1> ; CHECK: middle.block ; CHECK: icmp ult <2 x i32> @@ -331,7 +331,7 @@ ; ULE -> UGT ; Turn this into a max reduction (select inputs are reversed). ; CHECK-LABEL: @ule_min_red( -; CHECK: icmp ule <2 x i32> +; CHECK: icmp ugt <2 x i32> ; CHECK: select <2 x i1> ; CHECK: middle.block ; CHECK: icmp ugt <2 x i32>