Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -3855,15 +3855,36 @@ m_ExtractValue<0>(m_Intrinsic( m_Specific(PN), m_ConstantInt(Step))))) return std::make_pair(IVInc, Step->getValue()); - // TODO: The result of the intrinsics above is two-compliment. However when - // IV inc is expressed as add or sub, iv.next is potentially a poison value. - // If it has nuw or nsw flags, we need to make sure that these flags are - // inferrable at the point of memory instruction. Otherwise we are replacing - // well-defined two-compliment computation with poison. Currently, to avoid - // potentially complex analysis needed to prove this, we reject such cases. - auto *OIVInc = dyn_cast(IVInc); - if (!OIVInc || OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) - return None; + if (auto *OIVInc = dyn_cast(IVInc)) + if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) { + bool ProvedNoPoison = false; + // If IVInc is represented as wrapping binary operator with no wrap + // flags, it may be poison. To legally replace the use of IV with use of + // IV.next, we need to prove that memory instuction will never execute + // with this poison. In general it's a hard thing to prove, but there is + // a widely-used case where the proof is trivial. If MemoryInstr is + // dominated by loop-exiting branch by icmp conditon involving IV.next, + // we can always assume that poison value will exit this loop at this + // point and we never reach MemoryInst. + for (auto *Node = DT.getNode(MemoryInst->getParent())->getIDom(); + L->contains(Node->getBlock()); Node = Node->getIDom()) { + BasicBlock *BB = Node->getBlock(); + BasicBlock *IfTrue, *IfFalse; + ICmpInst::Predicate Pred; + if (match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Specific(IVInc), m_Value()), + m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))) || + match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(), m_Specific(IVInc)), + m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) + if (!L->contains(IfTrue) || !L->contains(IfFalse)) { + ProvedNoPoison = true; + break; + } + } + if (!ProvedNoPoison) + return None; + } if (match(IVInc, m_Sub(m_Specific(PN), m_ConstantInt(Step)))) return std::make_pair(IVInc, -Step->getValue()); if (match(IVInc, m_Add(m_Specific(PN), m_ConstantInt(Step)))) Index: llvm/test/CodeGen/X86/overflowing-iv.ll =================================================================== --- llvm/test/CodeGen/X86/overflowing-iv.ll +++ llvm/test/CodeGen/X86/overflowing-iv.ll @@ -107,10 +107,10 @@ ; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[LEN_PLUS_1]] ; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV_NEXT]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4 +; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -8 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] @@ -204,10 +204,10 @@ ; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[LEN_PLUS_1]] ; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV_NEXT]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4 +; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -8 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]