Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -309,16 +309,19 @@ /// 1. The icmp predicate is inverted /// 2. The select operands are reversed /// 3. The magnitude of C2 and C1 are flipped -static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, +static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy &Builder) { - const ICmpInst *IC = dyn_cast(SI.getCondition()); - if (!IC || !SI.getType()->isIntegerTy()) + if (!TrueVal->getType()->isIntOrIntVectorTy()) return nullptr; Value *CmpLHS = IC->getOperand(0); Value *CmpRHS = IC->getOperand(1); + // If this is a vector select, we need a vector compare. + if (TrueVal->getType()->isVectorTy() != CmpLHS->getType()->isVectorTy()) + return nullptr; + Value *V; unsigned C1Log; bool IsEqualZero; @@ -367,8 +370,8 @@ bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal); bool NeedShift = C1Log != C2Log; - bool NeedZExtTrunc = Y->getType()->getIntegerBitWidth() != - V->getType()->getIntegerBitWidth(); + bool NeedZExtTrunc = Y->getType()->getScalarSizeInBits() != + V->getType()->getScalarSizeInBits(); // Make sure we don't create more instructions than we save. Value *Or = OrOnFalseVal ? FalseVal : TrueVal; @@ -818,7 +821,7 @@ } } - if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) + if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) Index: test/Transforms/InstCombine/select-with-bitwise-ops.ll =================================================================== --- test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -17,6 +17,20 @@ ret i32 %select } +define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec( +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %and = and <2 x i32> %x, + %cmp = icmp eq <2 x i32> %and, zeroinitializer + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8( ; CHECK-NEXT: [[AND:%.*]] = lshr i32 %x, 2 @@ -31,6 +45,20 @@ ret i32 %select } +define <2 x i32> @select_icmp_eq_and_32_0_or_8_vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8_vec( +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %and = and <2 x i32> %x, + %cmp = icmp eq <2 x i32> %and, zeroinitializer + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096( ; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 @@ -45,6 +73,20 @@ ret i32 %select } +define <2 x i32> @select_icmp_ne_0_and_4096_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096_vec( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %and = and <2 x i32> %x, + %cmp = icmp ne <2 x i32> zeroinitializer, %and + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096( ; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096 @@ -58,6 +100,19 @@ ret i32 %select } +define <2 x i32> @select_icmp_eq_and_4096_0_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096_vec( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[AND]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %and = and <2 x i32> %x, + %cmp = icmp eq <2 x i32> %and, zeroinitializer + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1( ; CHECK-NEXT: [[X_TR:%.*]] = trunc i64 %x to i32 @@ -72,6 +127,20 @@ ret i32 %select } +define <2 x i32> @select_icmp_eq_0_and_1_or_1_vec(<2 x i64> %x, <2 x i32> %y) { +; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1_vec( +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %and = and <2 x i64> %x, + %cmp = icmp eq <2 x i64> %and, zeroinitializer + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + define i32 @select_icmp_ne_0_and_4096_or_32(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_32( ; CHECK-NEXT: [[AND:%.*]] = lshr i32 %x, 7 @@ -102,6 +171,21 @@ ret i32 %select } +define <2 x i32> @select_icmp_ne_0_and_32_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096_vec( +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %and = and <2 x i32> %x, + %cmp = icmp ne <2 x i32> zeroinitializer, %and + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8( ; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 1073741824 @@ -218,6 +302,21 @@ ret i64 %or.y } +define <2 x i64> @select_icmp_x_and_8_ne_0_y_or_8_vec(<2 x i32> %x, <2 x i64> %y) { +; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8_vec( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %and = and <2 x i32> %x, + %cmp = icmp eq <2 x i32> %and, zeroinitializer + %or = or <2 x i64> %y, + %or.y = select <2 x i1> %cmp, <2 x i64> %or, <2 x i64> %y + ret <2 x i64> %or.y +} + define i32 @select_icmp_and_2147483648_ne_0_xor_2147483648(i32 %x) { ; CHECK-LABEL: @select_icmp_and_2147483648_ne_0_xor_2147483648( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x, 2147483647 @@ -366,6 +465,20 @@ ret i32 %select } +define <2 x i32> @test68vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @test68vec( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %and = and <2 x i32> %x, + %cmp = icmp eq <2 x i32> %and, zeroinitializer + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + define i32 @test69(i32 %x, i32 %y) { ; CHECK-LABEL: @test69( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 @@ -381,6 +494,21 @@ ret i32 %select } +define <2 x i32> @test69vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @test69vec( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; + %and = and <2 x i32> %x, + %cmp = icmp ne <2 x i32> %and, zeroinitializer + %or = or <2 x i32> %y, + %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or + ret <2 x i32> %select +} + ; TODO: we should be able to remove this select define i8 @test70(i8 %x, i8 %y) { ; CHECK-LABEL: @test70(