diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2830,8 +2830,7 @@ return nullptr; } -static Instruction *foldICmpBitCast(ICmpInst &Cmp, - InstCombiner::BuilderTy &Builder) { +Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { auto *Bitcast = dyn_cast(Cmp.getOperand(0)); if (!Bitcast) return nullptr; @@ -2917,6 +2916,26 @@ return new ICmpInst(Pred, BCSrcOp, Op1); } + const APInt *C; + if (!match(Cmp.getOperand(1), m_APInt(C)) || + !Bitcast->getType()->isIntegerTy() || + !Bitcast->getSrcTy()->isIntOrIntVectorTy()) + return nullptr; + + // If this is checking if all elements of a vector compare are set or not, + // invert the casted vector equality compare and test if all compare + // elements are clear or not. Compare against zero is generally easier for + // analysis and codegen. + // icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0 + // Example: are all elements equal? --> are zero elements not equal? + // TODO: Try harder to reduce compare of 2 freely invertible operands? + if (Cmp.isEquality() && C->isAllOnesValue() && Bitcast->hasOneUse() && + isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) { + Type *ScalarTy = Bitcast->getType(); + Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), ScalarTy); + return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy)); + } + // Folding: icmp iN X, C // where X = bitcast (shufflevector %vec, undef, SC)) to iN // and C is a splat of a K-bit pattern @@ -2924,12 +2943,6 @@ // Into: // %E = extractelement %vec, i32 C' // icmp iK %E, trunc(C) - const APInt *C; - if (!match(Cmp.getOperand(1), m_APInt(C)) || - !Bitcast->getType()->isIntegerTy() || - !Bitcast->getSrcTy()->isIntOrIntVectorTy()) - return nullptr; - Value *Vec; ArrayRef Mask; if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) { @@ -5777,7 +5790,7 @@ return New; } - if (Instruction *Res = foldICmpBitCast(I, Builder)) + if (Instruction *Res = foldICmpBitCast(I)) return Res; // TODO: Hoist this above the min/max bailout. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -701,6 +701,7 @@ const APInt &C); Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II, const APInt &C); + Instruction *foldICmpBitCast(ICmpInst &Cmp); // Helpers of visitSelectInst(). Instruction *foldSelectExtConst(SelectInst &Sel); diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll --- a/llvm/test/Transforms/InstCombine/icmp-vec.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll @@ -402,9 +402,9 @@ define i1 @eq_cast_eq-1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @eq_cast_eq-1( -; CHECK-NEXT: [[IC:%.*]] = icmp eq <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2 -; CHECK-NEXT: [[R:%.*]] = icmp eq i2 [[B]], -1 +; CHECK-NEXT: [[IC:%.*]] = icmp ne <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[IC]] to i2 +; CHECK-NEXT: [[R:%.*]] = icmp eq i2 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %ic = icmp eq <2 x i4> %x, %y @@ -415,9 +415,9 @@ define i1 @ne_cast_eq-1(<3 x i7> %x, <3 x i7> %y) { ; CHECK-LABEL: @ne_cast_eq-1( -; CHECK-NEXT: [[IC:%.*]] = icmp ne <3 x i7> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i1> [[IC]] to i3 -; CHECK-NEXT: [[R:%.*]] = icmp eq i3 [[B]], -1 +; CHECK-NEXT: [[IC:%.*]] = icmp eq <3 x i7> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i1> [[IC]] to i3 +; CHECK-NEXT: [[R:%.*]] = icmp eq i3 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %ic = icmp ne <3 x i7> %x, %y @@ -428,9 +428,9 @@ define i1 @eq_cast_ne-1(<2 x i7> %x, <2 x i7> %y) { ; CHECK-LABEL: @eq_cast_ne-1( -; CHECK-NEXT: [[IC:%.*]] = icmp eq <2 x i7> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2 -; CHECK-NEXT: [[R:%.*]] = icmp ne i2 [[B]], -1 +; CHECK-NEXT: [[IC:%.*]] = icmp ne <2 x i7> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[IC]] to i2 +; CHECK-NEXT: [[R:%.*]] = icmp ne i2 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %ic = icmp eq <2 x i7> %x, %y @@ -441,9 +441,9 @@ define i1 @ne_cast_ne-1(<3 x i5> %x, <3 x i5> %y) { ; CHECK-LABEL: @ne_cast_ne-1( -; CHECK-NEXT: [[IC:%.*]] = icmp ne <3 x i5> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i1> [[IC]] to i3 -; CHECK-NEXT: [[R:%.*]] = icmp ne i3 [[B]], -1 +; CHECK-NEXT: [[IC:%.*]] = icmp eq <3 x i5> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i1> [[IC]] to i3 +; CHECK-NEXT: [[R:%.*]] = icmp ne i3 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %ic = icmp ne <3 x i5> %x, %y @@ -454,9 +454,9 @@ define i1 @ugt_cast_eq-1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @ugt_cast_eq-1( -; CHECK-NEXT: [[IC:%.*]] = icmp ugt <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2 -; CHECK-NEXT: [[R:%.*]] = icmp eq i2 [[B]], -1 +; CHECK-NEXT: [[IC:%.*]] = icmp ule <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[IC]] to i2 +; CHECK-NEXT: [[R:%.*]] = icmp eq i2 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %ic = icmp ugt <2 x i4> %x, %y @@ -467,9 +467,9 @@ define i1 @slt_cast_ne-1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @slt_cast_ne-1( -; CHECK-NEXT: [[IC:%.*]] = icmp slt <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2 -; CHECK-NEXT: [[R:%.*]] = icmp ne i2 [[B]], -1 +; CHECK-NEXT: [[IC:%.*]] = icmp sge <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[IC]] to i2 +; CHECK-NEXT: [[R:%.*]] = icmp ne i2 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %ic = icmp slt <2 x i4> %x, %y @@ -480,9 +480,9 @@ define i1 @ueq_cast_eq-1(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: @ueq_cast_eq-1( -; CHECK-NEXT: [[FC:%.*]] = fcmp ueq <3 x float> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i1> [[FC]] to i3 -; CHECK-NEXT: [[R:%.*]] = icmp eq i3 [[B]], -1 +; CHECK-NEXT: [[FC:%.*]] = fcmp one <3 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i1> [[FC]] to i3 +; CHECK-NEXT: [[R:%.*]] = icmp eq i3 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %fc = fcmp ueq <3 x float> %x, %y @@ -493,9 +493,8 @@ define i1 @not_cast_ne-1(<3 x i1> %x) { ; CHECK-LABEL: @not_cast_ne-1( -; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i1> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i1> [[NOT]] to i3 -; CHECK-NEXT: [[R:%.*]] = icmp ne i3 [[B]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i1> [[X:%.*]] to i3 +; CHECK-NEXT: [[R:%.*]] = icmp ne i3 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %not = xor <3 x i1> %x, @@ -508,8 +507,8 @@ ; CHECK-LABEL: @not_cast_ne-1_uses( ; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i2> [[X:%.*]], ; CHECK-NEXT: store <3 x i2> [[NOT]], <3 x i2>* [[P:%.*]], align 4 -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i2> [[NOT]] to i6 -; CHECK-NEXT: [[R:%.*]] = icmp ne i6 [[B]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i2> [[X]] to i6 +; CHECK-NEXT: [[R:%.*]] = icmp ne i6 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %not = xor <3 x i2> %x, @@ -519,6 +518,8 @@ ret i1 %r } +; negative test - need equality pred on 2nd cmp + define i1 @eq_cast_sgt-1(<3 x i4> %x, <3 x i4> %y) { ; CHECK-LABEL: @eq_cast_sgt-1( ; CHECK-NEXT: [[IC:%.*]] = icmp eq <3 x i4> [[X:%.*]], [[Y:%.*]] @@ -532,6 +533,8 @@ ret i1 %r } +; negative test - need all-ones constant on 2nd cmp + define i1 @eq_cast_eq1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @eq_cast_eq1( ; CHECK-NEXT: [[IC:%.*]] = icmp eq <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -545,6 +548,8 @@ ret i1 %r } +; negative test - extra use + define i1 @eq_cast_eq-1_use1(<2 x i4> %x, <2 x i4> %y, <2 x i1>* %p) { ; CHECK-LABEL: @eq_cast_eq-1_use1( ; CHECK-NEXT: [[IC:%.*]] = icmp sgt <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -560,6 +565,8 @@ ret i1 %r } +; negative test - extra use + define i1 @eq_cast_eq-1_use2(<2 x i4> %x, <2 x i4> %y, i2* %p) { ; CHECK-LABEL: @eq_cast_eq-1_use2( ; CHECK-NEXT: [[IC:%.*]] = icmp sgt <2 x i4> [[X:%.*]], [[Y:%.*]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -8,14 +8,14 @@ ; CHECK-LABEL: @test_merge_allof_v4sf( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 @@ -176,14 +176,14 @@ ; CHECK-LABEL: @test_separate_allof_v4sf( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 @@ -350,18 +350,18 @@ ; CHECK-LABEL: @test_merge_allof_v4si( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[RETURN]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float ; CHECK-NEXT: br label [[RETURN]] @@ -507,16 +507,16 @@ ; CHECK-LABEL: @test_separate_allof_v4si( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], i32 0, i32 [[ADD]] ; CHECK-NEXT: br label [[RETURN]]