Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -51,6 +51,12 @@ static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); +// This optimization is known to cause performance regressions is some cases, +// keep it under a temporary flag for now. +static cl::opt +DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits", + cl::Hidden, cl::init(true)); + /// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// 0). For vector types, returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { @@ -1300,9 +1306,46 @@ APInt KnownZero3(KnownZero), KnownOne3(KnownOne); computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q); - KnownZero = APInt::getLowBitsSet(BitWidth, - std::min(KnownZero2.countTrailingOnes(), - KnownZero3.countTrailingOnes())); + KnownZero = APInt::getLowBitsSet( + BitWidth, std::min(KnownZero2.countTrailingOnes(), + KnownZero3.countTrailingOnes())); + + if (DontImproveNonNegativePhiBits) + break; + + auto *OverflowOp = dyn_cast(LU); + if (OverflowOp && OverflowOp->hasNoSignedWrap()) { + // If initial value of recurrence is nonnegative, and we are adding + // a nonnegative number with nsw, the result can only be nonnegative + // or poison value regardless of the number of times we execute the + // add in phi recurrence. If initial value is negative and we are + // adding a negative number with nsw, the result can only be + // negative or poison value. Similar arguments apply to sub and mul. + // + // (add non-negative, non-negative) --> non-negative + // (add negative, negative) --> negative + if (Opcode == Instruction::Add) { + if (KnownZero2.isNegative() && KnownZero3.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (KnownOne2.isNegative() && KnownOne3.isNegative()) + KnownOne.setBit(BitWidth - 1); + } + + // (sub nsw non-negative, negative) --> non-negative + // (sub nsw negative, non-negative) --> negative + else if (Opcode == Instruction::Sub && LL == I) { + if (KnownZero2.isNegative() && KnownOne3.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (KnownOne2.isNegative() && KnownZero3.isNegative()) + KnownOne.setBit(BitWidth - 1); + } + + // (mul nsw non-negative, non-negative) --> non-negative + else if (Opcode == Instruction::Mul && KnownZero2.isNegative() && + KnownZero3.isNegative()) + KnownZero.setBit(BitWidth - 1); + } + break; } } Index: llvm/trunk/test/Transforms/BBVectorize/loop1.ll =================================================================== --- llvm/trunk/test/Transforms/BBVectorize/loop1.ll +++ llvm/trunk/test/Transforms/BBVectorize/loop1.ll @@ -1,7 +1,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL +; RUN: opt < %s -dont-improve-non-negative-phi-bits=false -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL ; The second check covers the use of alias analysis (with loop unrolling). define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable { @@ -83,7 +83,7 @@ ; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11 ; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>* ; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8 -; CHECK-UNRL: %indvars.iv.next.1 = add nsw i64 %indvars.iv, 2 +; CHECK-UNRL: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2 ; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32 ; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10 ; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body