Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -54,6 +54,36 @@ return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } +/// Fold +/// %A = icmp eq/ne i8 %x, 0 +/// %B = op i8 %x, %z +/// %C = select i1 %A, i8 %B, i8 %y +/// To +/// %C = select i1 %A, i8 %z, i8 %y +/// OP: binop with an identity constant +/// TODO: support for non-commutative and FP opcodes +static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) { + + Value *Cond = Sel.getCondition(); + Value *X, *Z; + Constant *C; + CmpInst::Predicate Pred; + if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) || + !ICmpInst::isEquality(Pred)) + return nullptr; + + bool IsEq = Pred == ICmpInst::ICMP_EQ; + auto *BO = + dyn_cast(IsEq ? Sel.getTrueValue() : Sel.getFalseValue()); + // TODO: support for undefs + if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) && + ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) { + Sel.setOperand(IsEq ? 1 : 2, Z); + return &Sel; + } + return nullptr; +} + /// This folds: /// select (icmp eq (and X, C1)), TC, FC /// iff C1 is a power 2 and the difference between TC and FC is a power-of-2. @@ -1961,5 +1991,8 @@ if (Instruction *Select = foldSelectCmpXchg(SI)) return Select; + if (Instruction *Select = foldSelectBinOpIdentity(SI)) + return Select; + return nullptr; } Index: llvm/trunk/test/Transforms/InstCombine/select-binop-icmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select-binop-icmp.ll +++ llvm/trunk/test/Transforms/InstCombine/select-binop-icmp.ll @@ -6,8 +6,7 @@ define i32 @select_xor_icmp(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_icmp( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 @@ -19,8 +18,7 @@ define i32 @select_xor_icmp2(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_icmp2( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp ne i32 %x, 0 @@ -32,8 +30,7 @@ define i32 @select_xor_icmp_meta(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_icmp_meta( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]], !prof !0 +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]], !prof !0 ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 @@ -45,8 +42,7 @@ define i32 @select_mul_icmp(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_mul_icmp( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 1 -; CHECK-NEXT: [[B:%.*]] = mul i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 1 @@ -58,8 +54,7 @@ define i32 @select_add_icmp(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_add_icmp( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = add i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 @@ -71,8 +66,7 @@ define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_or_icmp( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = or i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 @@ -84,8 +78,7 @@ define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], -1 -; CHECK-NEXT: [[B:%.*]] = and i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, -1 @@ -97,8 +90,7 @@ define <2 x i8> @select_xor_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; %A = icmp eq <2 x i8> %x, @@ -111,8 +103,7 @@ ; CHECK-LABEL: @select_xor_icmp_vec_use( ; CHECK-NEXT: [[A:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer ; CHECK-NEXT: call void @use(<2 x i1> [[A]]) -; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Y:%.*]], <2 x i8> [[B]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; %A = icmp ne <2 x i8> %x, @@ -125,8 +116,7 @@ define i32 @select_xor_inv_icmp(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_inv_icmp( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = xor i32 [[Z:%.*]], [[X]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp eq i32 %x, 0 @@ -138,8 +128,7 @@ define i32 @select_xor_inv_icmp2(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_xor_inv_icmp2( ; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[C]] ; %A = icmp ne i32 %x, 0 @@ -240,6 +229,7 @@ ret <2 x i8> %C } +; TODO: support for undefs, check for an identity constant does not handle them yet define <2 x i8> @select_xor_icmp_vec_bad_2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec_bad_2( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], Index: llvm/trunk/test/Transforms/SimplifyCFG/merge-cond-stores.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/merge-cond-stores.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/merge-cond-stores.ll @@ -5,15 +5,15 @@ define void @test_simple(i32* %p, i32 %a, i32 %b) { ; CHECK-LABEL: @test_simple( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[X1:%.*]] = icmp ne i32 [[A:%.*]], 0 ; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X2]], true -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[X2]], true +; CHECK-NEXT: [[TMP1:%.*]] = or i1 [[X1]], [[TMP0]] +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] ; CHECK: [[NOT_X2:%.*]] = xor i1 [[X2]], true -; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_X2]] to i32 -; CHECK-NEXT: store i32 [[DOT]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: br label [[TMP4]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[NOT_X2]] to i32 +; CHECK-NEXT: store i32 [[SPEC_SELECT]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[TMP3]] ; CHECK: ret void ; entry: @@ -44,8 +44,8 @@ ; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[X1]], [[X2]] ; CHECK-NEXT: br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]] -; CHECK: [[DOT:%.*]] = zext i1 [[X2]] to i32 -; CHECK-NEXT: store i32 [[DOT]], i32* [[P:%.*]], align 4 +; CHECK: [[SPEC_SELECT:%.*]] = zext i1 [[X2]] to i32 +; CHECK-NEXT: store i32 [[SPEC_SELECT]], i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[TMP2]] ; CHECK: ret void ; @@ -76,16 +76,16 @@ ; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[X4]], true -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[X4]], true +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] ; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]] ; CHECK: [[X3:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[NOT_X2:%.*]] = icmp ne i32 [[B]], 0 -; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_X2]] to i32 -; CHECK-NEXT: [[DOT_:%.*]] = select i1 [[X3]], i32 [[DOT]], i32 2 -; CHECK-NEXT: [[DOT__:%.*]] = select i1 [[X4]], i32 [[DOT_]], i32 3 -; CHECK-NEXT: store i32 [[DOT__]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X2:%.*]] = icmp ne i32 [[B]], 0 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[X2]] to i32 +; CHECK-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[X3]], i32 [[SPEC_SELECT]], i32 2 +; CHECK-NEXT: [[SPEC_SELECT2:%.*]] = select i1 [[X4]], i32 [[SPEC_SELECT1]], i32 3 +; CHECK-NEXT: store i32 [[SPEC_SELECT2]], i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[TMP6]] ; CHECK: ret void ; @@ -265,8 +265,7 @@ ; CHECK-LABEL: @test_diamond_simple( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[Z1:%.*]] = add i32 [[A]], [[B:%.*]] -; CHECK-NEXT: [[Z2:%.*]] = select i1 [[X1]], i32 [[Z1]], i32 0 +; CHECK-NEXT: [[Z2:%.*]] = select i1 [[X1]], i32 [[B:%.*]], i32 0 ; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B]], 0 ; CHECK-NEXT: [[Z3:%.*]] = sub i32 [[Z2]], [[B]] ; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z3]], i32 3