Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -128,7 +128,7 @@ cl::init(false), cl::Hidden); /// Currently, the generation "phi of ops" can result in correctness issues. -static cl::opt EnablePhiOfOps("enable-phi-of-ops", cl::init(false), +static cl::opt EnablePhiOfOps("enable-phi-of-ops", cl::init(true), cl::Hidden); //===----------------------------------------------------------------------===// @@ -475,6 +475,7 @@ // These mappings just store various data that would normally be part of the // IR. DenseSet PHINodeUses; + DenseMap OpSafeToSimplify; // Map a temporary instruction we created to a parent block. DenseMap TempToBlock; // Map between the already in-program instructions and the temporary phis we @@ -595,7 +596,7 @@ private: // Expression handling. - const Expression *createExpression(Instruction *) const; + const Expression *createExpression(Instruction *, bool) const; const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *) const; PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, @@ -643,6 +644,13 @@ void initializeCongruenceClasses(Function &F); const Expression *makePossiblePhiOfOps(Instruction *, SmallPtrSetImpl &); + Value *findLeaderForInst(Instruction *ValueOp, + SmallPtrSetImpl &Visited, + bool SafeToSimplify, MemoryAccess *MemAccess, + Instruction *OrigInst, BasicBlock *PredBB); + + bool OpIsSafeToSimplify(Value *Op, Instruction *OrigInst, + const BasicBlock *PHIBlock); void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); void removePhiOfOps(Instruction *I, PHINode *PHITemp); @@ -703,8 +711,7 @@ void replaceInstruction(Instruction *, Value *); void markInstructionForDeletion(Instruction *); void deleteInstructionsInBlock(BasicBlock *); - Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const; - + Value *findPHIOfOpsLeader(const Expression *E, const BasicBlock *BB) const; // New instruction creation. void handleNewInstruction(Instruction *){}; @@ -982,7 +989,12 @@ return nullptr; } -const Expression *NewGVN::createExpression(Instruction *I) const { +// Create a value expression from the instruction I, replacing operands with +// their leadesr. If SafeToSimplify is true, we use the instruction simplifier +// to try to simplify the resulting expression. It is not always safe to +// simplify, see makePossiblePhiOfOps for why. +const Expression *NewGVN::createExpression(Instruction *I, + bool SafeToSimplify) const { auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); bool AllConstant = setBasicExpressionInfo(I, E); @@ -996,7 +1008,8 @@ if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))) E->swapOperands(0, 1); } - + if (!SafeToSimplify) + return E; // Perform simplification. // TODO: Right now we only check to see if we get a constant result. // We may get a less than constant, but still better, result for @@ -1834,7 +1847,7 @@ } } // Create expression will take care of simplifyCmpInst - return createExpression(I); + return createExpression(I, true); } // Return true if V is a value that will always be available (IE can @@ -1876,7 +1889,7 @@ E = performSymbolicLoadEvaluation(I); break; case Instruction::BitCast: { - E = createExpression(I); + E = createExpression(I, true); } break; case Instruction::ICmp: case Instruction::FCmp: { @@ -1916,7 +1929,7 @@ case Instruction::InsertElement: case Instruction::ShuffleVector: case Instruction::GetElementPtr: - E = createExpression(I); + E = createExpression(I, true); break; default: return nullptr; @@ -2356,7 +2369,7 @@ Value *CondEvaluated = findConditionEquivalence(Cond); if (!CondEvaluated) { if (auto *I = dyn_cast(Cond)) { - const Expression *E = createExpression(I); + const Expression *E = createExpression(I, true); if (const auto *CE = dyn_cast(E)) { CondEvaluated = CE->getConstantValue(); } @@ -2459,6 +2472,118 @@ isa(I); } +namespace llvm { +// This GraphTraits template will walk the use-def graph. +template <> struct GraphTraits { + using NodeRef = Value *; + using ChildIteratorType = User::value_op_iterator; + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + if (!isa(N)) + return User::value_op_iterator(nullptr); + return cast(N)->value_op_begin(); + } + static ChildIteratorType child_end(NodeRef N) { + if (!isa(N)) + return User::value_op_iterator(nullptr); + return cast(N)->value_op_end(); + } +}; +} + +// Return true if this operand will be safe to pass to the simplifier if used as +// part of a phi of ops instruction. +// +// The reason some operands are unsafe is that we are not trying to recursively +// translate everything back through phi nodes. We actually expect some lookups +// of expressions to fail. However, the simplifier itself sometimes tries to +// look through phi nodes as it simplifies. If we have translated one operand +// through a phi node, and another not, the simplifier will now not realize it +// is looking at things from two different loop iterations, and get wrong +// answers. An example of this is pr33185. +bool NewGVN::OpIsSafeToSimplify(Value *Op, Instruction *OrigInst, + const BasicBlock *PHIBlock) { + if (!isa(Op)) + return true; + auto OISIt = OpSafeToSimplify.find(Op); + if (OISIt != OpSafeToSimplify.end()) + return OISIt->second; + + // The real answer is to DFS the operand and see if it depends on a phi in + // PHIBlock. + for (auto DFI = df_begin(Op), DFE = df_end(Op); DFI != DFE;) { + Value *Curr = *DFI; + // See if we already know the answer for this node. + auto OISIt = OpSafeToSimplify.find(Curr); + if (OISIt != OpSafeToSimplify.end()) { + if (!OISIt->second) { + OpSafeToSimplify.insert({Curr, false}); + return false; + } + + DFI.skipChildren(); + continue; + } + + // If it's not an instruction we are safe. + if (!isa(Curr)) { + DFI.skipChildren(); + continue; + } + + // Keep walking until we either dominate the phi block, or hit a phi, or run + // out of things to check. Note the order of checks is important. PHIs that + // strictly dominate our phi block are okay. + Instruction *CurrInst = cast(Curr); + if (DT->properlyDominates(getBlockForValue(CurrInst), PHIBlock)) { + OpSafeToSimplify.insert({Curr, true}); + DFI.skipChildren(); + continue; + } + if (isa(Curr)) { + OpSafeToSimplify.insert({Curr, false}); + return false; + } + + ++DFI; + } + OpSafeToSimplify.insert({Op, true}); + return true; +} + +Value *NewGVN::findLeaderForInst(Instruction *ValueOp, + SmallPtrSetImpl &Visited, + bool SafeToSimplify, MemoryAccess *MemAccess, + Instruction *OrigInst, BasicBlock *PredBB) { + unsigned IDFSNum = InstrToDFSNum(OrigInst); + // Make sure it's marked as a temporary instruction. + AllTempInstructions.insert(ValueOp); + // and make sure anything that tries to add it's DFS number is + // redirected to the instruction we are making a phi of ops + // for. + TempToBlock.insert({ValueOp, PredBB}); + InstrDFS.insert({ValueOp, IDFSNum}); + + const Expression *E = SafeToSimplify + ? performSymbolicEvaluation(ValueOp, Visited) + : createExpression(ValueOp, false); + InstrDFS.erase(ValueOp); + AllTempInstructions.erase(ValueOp); + TempToBlock.erase(ValueOp); + if (MemAccess) + TempToMemory.erase(ValueOp); + if (!E) + return nullptr; + auto *FoundVal = findPHIOfOpsLeader(E, PredBB); + if (!FoundVal || FoundVal == OrigInst) { + ExpressionToPhiOfOps[E].insert(OrigInst); + return nullptr; + } + if (auto *SI = dyn_cast(FoundVal)) + FoundVal = SI->getValueOperand(); + return FoundVal; +} + // When we see an instruction that is an op of phis, generate the equivalent phi // of ops form. const Expression * @@ -2476,7 +2601,6 @@ if (!isCycleFree(I)) return nullptr; - unsigned IDFSNum = InstrToDFSNum(I); SmallPtrSet ProcessedPHIs; // TODO: We don't do phi translation on memory accesses because it's // complicated. For a load, we'd need to be able to simulate a new memoryuse, @@ -2491,16 +2615,6 @@ // Convert op of phis to phi of ops for (auto &Op : I->operands()) { - // TODO: We can't handle expressions that must be recursively translated - // IE - // a = phi (b, c) - // f = use a - // g = f + phi of something - // To properly make a phi of ops for g, we'd have to properly translate and - // use the instruction for f. We should add this by splitting out the - // instruction creation we do below. - if (isa(Op) && PHINodeUses.count(cast(Op))) - return nullptr; if (!isa(Op)) continue; auto *OpPHI = cast(Op); @@ -2521,36 +2635,23 @@ Instruction *ValueOp = I->clone(); if (MemAccess) TempToMemory.insert({ValueOp, MemAccess}); - + bool SafeToSimplify = true; for (auto &Op : ValueOp->operands()) { Op = Op->DoPHITranslation(PHIBlock, PredBB); // When this operand changes, it could change whether there is a // leader for us or not. addAdditionalUsers(Op, I); + SafeToSimplify = + SafeToSimplify && OpIsSafeToSimplify(Op, I, PHIBlock); } - // Make sure it's marked as a temporary instruction. - AllTempInstructions.insert(ValueOp); - // and make sure anything that tries to add it's DFS number is - // redirected to the instruction we are making a phi of ops - // for. - TempToBlock.insert({ValueOp, PredBB}); - InstrDFS.insert({ValueOp, IDFSNum}); - const Expression *E = performSymbolicEvaluation(ValueOp, Visited); - InstrDFS.erase(ValueOp); - AllTempInstructions.erase(ValueOp); + FoundVal = ConstantFoldInstruction(ValueOp, DL, TLI); + FoundVal = FoundVal + ? FoundVal + : findLeaderForInst(ValueOp, Visited, SafeToSimplify, + MemAccess, I, PredBB); ValueOp->deleteValue(); - TempToBlock.erase(ValueOp); - if (MemAccess) - TempToMemory.erase(ValueOp); - if (!E) - return nullptr; - FoundVal = findPhiOfOpsLeader(E, PredBB); - if (!FoundVal) { - ExpressionToPhiOfOps[E].insert(I); + if (!FoundVal) return nullptr; - } - if (auto *SI = dyn_cast(FoundVal)) - FoundVal = SI->getValueOperand(); } else { DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " << getBlockName(PredBB) @@ -2565,7 +2666,8 @@ auto *ValuePHI = RealToTemp.lookup(I); bool NewPHI = false; if (!ValuePHI) { - ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + ValuePHI = + PHINode::Create(I->getType(), OpPHI->getNumOperands(), "phiofops"); addPhiOfOps(ValuePHI, PHIBlock, I); NewPHI = true; NumGVNPHIOfOpsCreated++; @@ -2690,6 +2792,8 @@ TempToBlock.clear(); TempToMemory.clear(); PHIOfOpsPHIs.clear(); + PHINodeUses.clear(); + OpSafeToSimplify.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -3520,8 +3624,9 @@ } // Given a value and a basic block we are trying to see if it is available in, -// see if the value has a leader available in that block. -Value *NewGVN::findPhiOfOpsLeader(const Expression *E, +// see if the value has a leader available in that block, and that will dominate +// OrigInst. +Value *NewGVN::findPHIOfOpsLeader(const Expression *E, const BasicBlock *BB) const { // It would already be constant if we could make it constant if (auto *CE = dyn_cast(E)) @@ -3541,11 +3646,12 @@ if (!MemberInst) return Member; // If we are looking for something in the same block as the member, it must - // be a leader because this function is looking for operands for a phi node. - if (MemberInst->getParent() == BB || - DT->dominates(MemberInst->getParent(), BB)) { + // occur before OrigInst to be valid. Otherwise, we may pull the wrong + // version over a loop backedge. Similarly, we have to ensure it is not in + // a block dominated by OrigInst, or else we can't guarantee that it isn't + // dependent on it. + if (DT->dominates(MemberInst->getParent(), BB)) return Member; - } } return nullptr; } Index: test/Transforms/NewGVN/completeness.ll =================================================================== --- test/Transforms/NewGVN/completeness.ll +++ test/Transforms/NewGVN/completeness.ll @@ -8,9 +8,9 @@ ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; CHECK: br label [[TMP6:%.*]] ; CHECK: br label [[TMP6]] -; CHECK: [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ] +; CHECK: [[PHIOFOPS:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ] ; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ] -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: ret i32 [[PHIOFOPS]] ; %3 = icmp ne i32 %0, 0 br i1 %3, label %4, label %5 @@ -59,9 +59,9 @@ ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ] ; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ] -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: ret i32 [[PHIOFOPS]] ; entry: @@ -83,9 +83,9 @@ ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] ; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ , [[DELAY]] ] -; CHECK-NEXT: ret <2 x i32> [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[PHIOFOPS]] ; entry: @@ -107,9 +107,9 @@ ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] ; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ , [[DELAY]] ] -; CHECK-NEXT: ret <2 x i32> [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[PHIOFOPS]] ; entry: @@ -188,11 +188,11 @@ ; CHECK: bb14: ; CHECK-NEXT: br label [[BB15:%.*]] ; CHECK: bb15: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ] ; CHECK-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ] ; CHECK-NEXT: [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ] ; CHECK-NEXT: [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ] -; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP]], align 8 +; CHECK-NEXT: store i64 [[PHIOFOPS]], i64* [[TMP]], align 8 ; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[TMP18]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]] ; CHECK-NEXT: [[TMP22]] = load i64, i64* [[TMP21]], align 8 @@ -263,17 +263,17 @@ ; CHECK-NEXT: entry-block: ; CHECK-NEXT: br label %main-loop ; CHECK: main-loop: -; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ] +; CHECK-NEXT: [[PHIOFOPS1:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ] ; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ] ; CHECK-NEXT: store volatile i8 0, i8* [[ADDR:%.*]] -; CHECK-NEXT: br i1 [[TMP0]], label %busy-wait-phi-0, label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[PHIOFOPS1]], label %busy-wait-phi-0, label [[EXIT:%.*]] ; CHECK: busy-wait-phi-0: ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]] ; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0 ; CHECK-NEXT: br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]] ; CHECK: core: -; CHECK-NEXT: br i1 [[TMP1]], label [[TRAP:%.*]], label %main-loop +; CHECK-NEXT: br i1 [[PHIOFOPS]], label [[TRAP:%.*]], label %main-loop ; CHECK: trap: ; CHECK-NEXT: ret i8 1 ; CHECK: exit: @@ -357,7 +357,7 @@ ; CHECK: bb2: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ] ; CHECK-NEXT: [[TMP8]] = add nuw nsw i32 [[TMP7]], 1 ; CHECK-NEXT: [[TMP11]] = add i32 -14, [[TMP8]] @@ -400,7 +400,8 @@ ; CHECK: i: ; CHECK-NEXT: br i1 undef, label [[K:%.*]], label [[G]] ; CHECK: k: -; CHECK-NEXT: br i1 false, label [[C]], label [[O:%.*]] +; CHECK-NEXT: [[L:%.*]] = icmp eq i32* [[N]], inttoptr (i64 32 to i32*) +; CHECK-NEXT: br i1 [[L]], label [[C]], label [[O:%.*]] ; CHECK: o: ; CHECK-NEXT: br label [[C]] ; CHECK: c: Index: test/Transforms/NewGVN/pr33305.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/pr33305.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -newgvn -S %s | FileCheck %s +; Ensure we do not incorrect do phi of ops +source_filename = "/Users/dannyb/sources/llvm-clean/debug-build/pr33305.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +@a = common global i32 0, align 4 +@b = local_unnamed_addr global i32* @a, align 8 +@e = local_unnamed_addr global i32 -1, align 4 +@g = local_unnamed_addr global i32 1, align 4 +@c = common local_unnamed_addr global i32 0, align 4 +@f = common local_unnamed_addr global i32 0, align 4 +@h = common local_unnamed_addr global i32 0, align 4 +@str = private unnamed_addr constant [5 x i8] c"fine\00" +@str.2 = private unnamed_addr constant [8 x i8] c"Screwed\00" + +; Function Attrs: nounwind optsize ssp uwtable +define i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTPR_I:%.*]] = load i32, i32* @c, align 4, !tbaa !3 +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[DOTPR_I]], 1 +; CHECK-NEXT: br i1 [[CMP13_I]], label [[FOR_COND1_PREHEADER_LR_PH_I:%.*]], label [[ENTRY_FOR_END9_I_CRIT_EDGE:%.*]] +; CHECK: entry.for.end9.i_crit_edge: +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* @h, align 4, !tbaa !3 +; CHECK-NEXT: br label [[FOR_END9_I:%.*]] +; CHECK: for.cond1.preheader.lr.ph.i: +; CHECK-NEXT: [[G_PROMOTED14_I:%.*]] = load i32, i32* @g, align 4, !tbaa !3 +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_I:%.*]] +; CHECK: for.cond1.preheader.i: +; CHECK-NEXT: [[INC816_I:%.*]] = phi i32 [ [[DOTPR_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ [[INC8_I:%.*]], [[FOR_INC7_I:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[G_PROMOTED14_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ 0, [[FOR_INC7_I]] ] +; CHECK-NEXT: br label [[FOR_BODY3_I:%.*]] +; CHECK: for.body3.i: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[FOR_COND1_PREHEADER_I]] ], [ true, [[LOR_END_I:%.*]] ] +; CHECK-NEXT: [[INC12_I:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_I]] ], [ [[INC_I:%.*]], [[LOR_END_I]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[FOR_COND1_PREHEADER_I]] ], [ 0, [[LOR_END_I]] ] +; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[OR_COND_I:%.*]] = and i1 [[TMP1]], [[TOBOOL_I]] +; CHECK-NEXT: br i1 [[OR_COND_I]], label [[LOR_END_I]], label [[LOR_RHS_I:%.*]] +; CHECK: lor.rhs.i: +; CHECK-NEXT: [[LNOT_I:%.*]] = xor i1 [[TOBOOL_I]], true +; CHECK-NEXT: [[LNOT_EXT_I:%.*]] = zext i1 [[LNOT_I]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[TMP3]], [[LNOT_EXT_I]] +; CHECK-NEXT: store i32 [[XOR_I]], i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: br label [[LOR_END_I]] +; CHECK: lor.end.i: +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[INC12_I]], 1 +; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i32 [[INC_I]], 2 +; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[FOR_INC7_I]], label [[FOR_BODY3_I]] +; CHECK: for.inc7.i: +; CHECK-NEXT: [[INC8_I]] = add nsw i32 [[INC816_I]], 1 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[INC816_I]], 0 +; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_COND1_PREHEADER_I]], label [[FOR_COND_FOR_END9_CRIT_EDGE_I:%.*]] +; CHECK: for.cond.for.end9_crit_edge.i: +; CHECK-NEXT: store i32 0, i32* @g, align 4, !tbaa !3 +; CHECK-NEXT: store i32 2, i32* @h, align 4, !tbaa !3 +; CHECK-NEXT: store i32 [[INC8_I]], i32* @c, align 4, !tbaa !3 +; CHECK-NEXT: br label [[FOR_END9_I]] +; CHECK: for.end9.i: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[DOTPRE]], [[ENTRY_FOR_END9_I_CRIT_EDGE]] ], [ 2, [[FOR_COND_FOR_END9_CRIT_EDGE_I]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** @b, align 8, !tbaa !7 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP5]], align 4, !tbaa !3 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: [[CMP10_I:%.*]] = icmp slt i32 [[TMP6]], -1 +; CHECK-NEXT: br i1 [[CMP10_I]], label [[IF_THEN_I:%.*]], label [[FN1_EXIT:%.*]] +; CHECK: if.then.i: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @f, align 4, !tbaa !3 +; CHECK-NEXT: store i32 [[TMP7]], i32* [[TMP5]], align 4, !tbaa !3 +; CHECK-NEXT: br label [[FN1_EXIT]] +; CHECK: fn1.exit: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* @a, align 4, !tbaa !3 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[PUTS2:%.*]] = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str.2, i64 0, i64 0)) +; CHECK-NEXT: tail call void @abort() #4 +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: [[PUTS:%.*]] = tail call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str, i64 0, i64 0)) +; CHECK-NEXT: ret i32 0 +; +entry: + %.pr.i = load i32, i32* @c, align 4, !tbaa !3 + %cmp13.i = icmp slt i32 %.pr.i, 1 + br i1 %cmp13.i, label %for.cond1.preheader.lr.ph.i, label %entry.for.end9.i_crit_edge + +entry.for.end9.i_crit_edge: ; preds = %entry + %.pre = load i32, i32* @h, align 4, !tbaa !3 + br label %for.end9.i + +for.cond1.preheader.lr.ph.i: ; preds = %entry + %g.promoted14.i = load i32, i32* @g, align 4, !tbaa !3 + br label %for.cond1.preheader.i + +for.cond1.preheader.i: ; preds = %for.inc7.i, %for.cond1.preheader.lr.ph.i + %inc816.i = phi i32 [ %.pr.i, %for.cond1.preheader.lr.ph.i ], [ %inc8.i, %for.inc7.i ] + %0 = phi i32 [ %g.promoted14.i, %for.cond1.preheader.lr.ph.i ], [ 0, %for.inc7.i ] + br label %for.body3.i + +for.body3.i: ; preds = %lor.end.i, %for.cond1.preheader.i + %1 = phi i1 [ false, %for.cond1.preheader.i ], [ true, %lor.end.i ] + %inc12.i = phi i32 [ 0, %for.cond1.preheader.i ], [ %inc.i, %lor.end.i ] + %2 = phi i32 [ %0, %for.cond1.preheader.i ], [ 0, %lor.end.i ] + %tobool.i = icmp ne i32 %2, 0 + %or.cond.i = and i1 %1, %tobool.i + br i1 %or.cond.i, label %lor.end.i, label %lor.rhs.i + +lor.rhs.i: ; preds = %for.body3.i + %lnot.i = xor i1 %tobool.i, true + %lnot.ext.i = zext i1 %lnot.i to i32 + %3 = load i32, i32* @e, align 4, !tbaa !3 + %xor.i = xor i32 %3, %lnot.ext.i + store i32 %xor.i, i32* @e, align 4, !tbaa !3 + br label %lor.end.i + +lor.end.i: ; preds = %lor.rhs.i, %for.body3.i + %inc.i = add nuw nsw i32 %inc12.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 2 + br i1 %exitcond.i, label %for.inc7.i, label %for.body3.i + +for.inc7.i: ; preds = %lor.end.i + %inc8.i = add nsw i32 %inc816.i, 1 + %cmp.i = icmp slt i32 %inc816.i, 0 + br i1 %cmp.i, label %for.cond1.preheader.i, label %for.cond.for.end9_crit_edge.i + +for.cond.for.end9_crit_edge.i: ; preds = %for.inc7.i + store i32 0, i32* @g, align 4, !tbaa !3 + store i32 2, i32* @h, align 4, !tbaa !3 + store i32 %inc8.i, i32* @c, align 4, !tbaa !3 + br label %for.end9.i + +for.end9.i: ; preds = %entry.for.end9.i_crit_edge, %for.cond.for.end9_crit_edge.i + %4 = phi i32 [ %.pre, %entry.for.end9.i_crit_edge ], [ 2, %for.cond.for.end9_crit_edge.i ] + %5 = load i32*, i32** @b, align 8, !tbaa !7 + store i32 %4, i32* %5, align 4, !tbaa !3 + %6 = load i32, i32* @e, align 4, !tbaa !3 + %cmp10.i = icmp slt i32 %6, -1 + br i1 %cmp10.i, label %if.then.i, label %fn1.exit + +if.then.i: ; preds = %for.end9.i + %7 = load i32, i32* @f, align 4, !tbaa !3 + store i32 %7, i32* %5, align 4, !tbaa !3 + br label %fn1.exit + +fn1.exit: ; preds = %if.then.i, %for.end9.i + %8 = load i32, i32* @a, align 4, !tbaa !3 + %tobool = icmp eq i32 %8, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %fn1.exit + %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str.2, i64 0, i64 0)) + tail call void @abort() #3 + unreachable + +if.end: ; preds = %fn1.exit + %puts = tail call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str, i64 0, i64 0)) + ret i32 0 +} + +; Function Attrs: noreturn nounwind optsize +declare void @abort() local_unnamed_addr #1 + +; Function Attrs: nounwind +declare i32 @puts(i8* nocapture readonly) local_unnamed_addr #2 + +attributes #0 = { nounwind optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noreturn nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } +attributes #3 = { noreturn nounwind optsize } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git e97b4dda83fd49e0218ea06ba4e37796a81b2027) (/Users/dannyb/sources/llvm-clean b38f051979e4ac2aa6513e40046d120fd472cb96)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"any pointer", !5, i64 0} Index: test/Transforms/NewGVN/pr33461.ll =================================================================== --- test/Transforms/NewGVN/pr33461.ll +++ test/Transforms/NewGVN/pr33461.ll @@ -8,12 +8,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[FOR_COND1:%.*]], label [[FOR_INC:%.*]] ; CHECK: for.cond1: -; CHECK-NEXT: [[TMP0:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_INC]] ], [ undef, [[ENTRY:%.*]] ] -; CHECK-NEXT: store i16 [[TMP0]], i16* @b, align 2 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* @b, align 2 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* @b, align 2 -; CHECK-NEXT: [[INC]] = add i16 [[TMP1]], 1 +; CHECK-NEXT: [[INC:%.*]] = add i16 [[TMP1]], 1 ; CHECK-NEXT: store i16 [[INC]], i16* @b, align 2 ; CHECK-NEXT: br label [[FOR_COND1]] ;