Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2457,6 +2457,45 @@ return nullptr; } +Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp, + BitCastInst *Bitcast, + const APInt &C) { + // Folding: icmp iN X, C + // where X = bitcast (shufflevector %vec, undef, SC)) to iN + // and C is a splat of a K-bit pattern + // and SC is a constant vector = + // Into: + // %E = extractelement %vec, i32 C' + // icmp iK %E, trunc(C) + if (!Bitcast->getType()->isIntegerTy() || + !Bitcast->getSrcTy()->isIntOrIntVectorTy()) + return nullptr; + + Value *BCIOp = Bitcast->getOperand(0); + Value *Vec = nullptr; // 1st vector arg of the shufflevector + Constant *Mask = nullptr; // Mask arg of the shufflevector + if (match(BCIOp, + m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) { + // Check whether every element of Mask is the same constant + if (auto *Elem = dyn_cast_or_null(Mask->getSplatValue())) { + auto *VecTy = cast(BCIOp->getType()); + auto *EltTy = cast(VecTy->getElementType()); + auto Pred = Cmp.getPredicate(); + if (C.isSplat(EltTy->getBitWidth())) { + // Fold the icmp based on the value of C + // If C is M copies of an iK sized bit pattern, + // then: + // => %E = extractelement %vec, i32 Elem + // icmp iK %SplatVal, + Value *Extract = Builder.CreateExtractElement(Vec, Elem); + Value *NewC = ConstantInt::get(EltTy, C.trunc(EltTy->getBitWidth())); + return new ICmpInst(Pred, Extract, NewC); + } + } + } + return nullptr; +} + /// Try to fold integer comparisons with a constant operand: icmp Pred X, C /// where X is some kind of instruction. Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { @@ -2531,6 +2570,11 @@ return I; } + if (auto *BCI = dyn_cast(Cmp.getOperand(0))) { + if (Instruction *I = foldICmpBitCastConstant(Cmp, BCI, *C)) + return I; + } + if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C)) return I; Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h @@ -734,6 +734,8 @@ Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select, ConstantInt *C); + Instruction *foldICmpBitCastConstant(ICmpInst &Cmp, BitCastInst *Bitcast, + const APInt &C); Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc, const APInt &C); Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And, Index: llvm/trunk/test/Transforms/InstCombine/icmp-bc-vec.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/icmp-bc-vec.ll +++ llvm/trunk/test/Transforms/InstCombine/icmp-bc-vec.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Tests to verify proper functioning of the icmp folding implemented in +; InstCombiner::foldICmpBitCastConstant +; Specifically, folding: +; icmp iN X, C +; where X = bitcast (shufflevector %vec, undef, SC)) to iN +; and C is a splat of a K-bit pattern +; and SC is a constant vector = +; Into: +; %E = extractelement %vec, i32 C' +; icmp iK %E, trunc(C) + +define i1 @test_i1_0(i1 %val) { +; CHECK-LABEL: @test_i1_0( +; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true +; CHECK-NEXT: ret i1 [[COND]] +; + %insvec = insertelement <4 x i1> undef, i1 %val, i32 0 + %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer + %cast = bitcast <4 x i1> %vec to i4 + %cond = icmp eq i4 %cast, 0 + ret i1 %cond +} + +define i1 @test_i1_0_2(i1 %val) { +; CHECK-LABEL: @test_i1_0_2( +; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true +; CHECK-NEXT: ret i1 [[COND]] +; + %insvec = insertelement <4 x i1> undef, i1 %val, i32 2 + %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> + %cast = bitcast <4 x i1> %vec to i4 + %cond = icmp eq i4 %cast, 0 + ret i1 %cond +} + +define i1 @test_i1_m1(i1 %val) { +; CHECK-LABEL: @test_i1_m1( +; CHECK-NEXT: ret i1 [[VAL:%.*]] +; + %insvec = insertelement <4 x i1> undef, i1 %val, i32 0 + %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer + %cast = bitcast <4 x i1> %vec to i4 + %cond = icmp eq i4 %cast, -1 + ret i1 %cond +} + +define i1 @test_i8_pattern(i8 %val) { +; CHECK-LABEL: @test_i8_pattern( +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72 +; CHECK-NEXT: ret i1 [[COND]] +; + %insvec = insertelement <4 x i8> undef, i8 %val, i32 0 + %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer + %cast = bitcast <4 x i8> %vec to i32 + %cond = icmp eq i32 %cast, 1212696648 + ret i1 %cond +} + +define i1 @test_i8_pattern_2(i8 %val) { +; CHECK-LABEL: @test_i8_pattern_2( +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72 +; CHECK-NEXT: ret i1 [[COND]] +; + %insvec = insertelement <4 x i8> undef, i8 %val, i32 2 + %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> + %cast = bitcast <4 x i8> %vec to i32 + %cond = icmp eq i32 %cast, 1212696648 + ret i1 %cond +} + +; Make sure we don't try to fold if the shufflemask has differing element values +define i1 @test_i8_pattern_3(<4 x i8> %invec) { +; CHECK-LABEL: @test_i8_pattern_3( +; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INVEC:%.*]], <4 x i8> undef, <4 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696648 +; CHECK-NEXT: ret i1 [[COND]] +; + %vec = shufflevector <4 x i8> %invec, <4 x i8> undef, <4 x i32> + %cast = bitcast <4 x i8> %vec to i32 + %cond = icmp eq i32 %cast, 1212696648 + ret i1 %cond +} + +; Make sure we don't try to fold if the compared-to constant isn't a splatted value +define i1 @test_i8_nopattern(i8 %val) { +; CHECK-LABEL: @test_i8_nopattern( +; CHECK-NEXT: [[INSVEC:%.*]] = insertelement <4 x i8> undef, i8 [[VAL:%.*]], i32 0 +; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INSVEC]], <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696647 +; CHECK-NEXT: ret i1 [[COND]] +; + %insvec = insertelement <4 x i8> undef, i8 %val, i32 0 + %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer + %cast = bitcast <4 x i8> %vec to i32 + %cond = icmp eq i32 %cast, 1212696647 + ret i1 %cond +} + +; Verify that we fold more than just the eq predicate +define i1 @test_i8_ult_pattern(i8 %val) { +; CHECK-LABEL: @test_i8_ult_pattern( +; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[VAL:%.*]], 72 +; CHECK-NEXT: ret i1 [[COND]] +; + %insvec = insertelement <4 x i8> undef, i8 %val, i32 0 + %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer + %cast = bitcast <4 x i8> %vec to i32 + %cond = icmp ult i32 %cast, 1212696648 + ret i1 %cond +} + +define i1 @extending_shuffle_with_weird_types(<2 x i9> %v) { +; CHECK-LABEL: @extending_shuffle_with_weird_types( +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i9> [[V:%.*]], i32 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i9 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[CMP]] +; + %splat = shufflevector <2 x i9> %v, <2 x i9> undef, <3 x i32> zeroinitializer + %cast = bitcast <3 x i9> %splat to i27 + %cmp = icmp slt i27 %cast, 262657 ; 0x040201 + ret i1 %cmp +}