Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" @@ -694,27 +695,31 @@ // FIXME: Type and constness constraints could be lifted, but we have to // watch code size carefully. We should consider xor instead of // sub/add when we decide to do that. - if (IntegerType *Ty = dyn_cast(CmpLHS->getType())) { - if (TrueVal->getType() == Ty) { - if (ConstantInt *Cmp = dyn_cast(CmpRHS)) { - ConstantInt *C1 = nullptr, *C2 = nullptr; - if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) { - C1 = dyn_cast(TrueVal); - C2 = dyn_cast(FalseVal); - } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) { - C1 = dyn_cast(FalseVal); - C2 = dyn_cast(TrueVal); - } - if (C1 && C2) { + if (CmpLHS->getType()->isIntOrIntVectorTy() && + CmpLHS->getType() == TrueVal->getType()) { + const APInt *C1, *C2; + if (match(TrueVal, m_APInt(C1)) && match(FalseVal, m_APInt(C2))) { + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *X; + APInt Mask; + if (decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) { + if (Mask.isSignMask()) { + assert(X == CmpLHS && "Expected to use the compare input directly"); + assert(ICmpInst::isEquality(Pred) && "Expected equality predicate"); + + if (Pred == ICmpInst::ICMP_NE) + std::swap(C1, C2); + // This shift results in either -1 or 0. - Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1); + Value *AShr = Builder.CreateAShr(X, Mask.getBitWidth() - 1); // Check if we can express the operation with a single or. - if (C2->isMinusOne()) - return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1)); + if (C2->isAllOnesValue()) + return replaceInstUsesWith(SI, Builder.CreateOr(AShr, *C1)); - Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue()); - return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1)); + Value *And = Builder.CreateAnd(AShr, *C2 - *C1); + return replaceInstUsesWith(SI, Builder.CreateAdd(And, + ConstantInt::get(And->getType(), *C1))); } } } Index: llvm/trunk/test/Transforms/InstCombine/select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select.ll +++ llvm/trunk/test/Transforms/InstCombine/select.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; PR1822 @@ -649,6 +650,33 @@ ; CHECK: ret } +define <2 x i32> @test35vec(<2 x i32> %x) { +; CHECK-LABEL: @test35vec( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %cmp = icmp sge <2 x i32> %x, + %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> + ret <2 x i32> %cond +} + +; Make sure we can still perform this optimization with a truncate present +define i32 @test35_with_trunc(i64 %x) { +; CHECK-LABEL: @test35_with_trunc( +; CHECK-NEXT: [[X1:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X1]], 31 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 40 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 60 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %x1 = trunc i64 %x to i32 + %cmp = icmp sge i32 %x1, 0 + %cond = select i1 %cmp, i32 60, i32 100 + ret i32 %cond +} + define i32 @test36(i32 %x) { %cmp = icmp slt i32 %x, 0 %cond = select i1 %cmp, i32 60, i32 100 @@ -660,6 +688,18 @@ ; CHECK: ret } +define <2 x i32> @test36vec(<2 x i32> %x) { +; CHECK-LABEL: @test36vec( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %cmp = icmp slt <2 x i32> %x, + %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> + ret <2 x i32> %cond +} + define i32 @test37(i32 %x) { %cmp = icmp sgt i32 %x, -1 %cond = select i1 %cmp, i32 1, i32 -1 @@ -670,6 +710,17 @@ ; CHECK: ret } +define <2 x i32> @test37vec(<2 x i32> %x) { +; CHECK-LABEL: @test37vec( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %cmp = icmp sgt <2 x i32> %x, + %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> + ret <2 x i32> %cond +} + define i1 @test38(i1 %cond) { %zero = alloca i32 %one = alloca i32