Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -128,7 +128,7 @@ cl::init(false), cl::Hidden); /// Currently, the generation "phi of ops" can result in correctness issues. -static cl::opt EnablePhiOfOps("enable-phi-of-ops", cl::init(false), +static cl::opt EnablePhiOfOps("enable-phi-of-ops", cl::init(true), cl::Hidden); //===----------------------------------------------------------------------===// @@ -475,6 +475,7 @@ // These mappings just store various data that would normally be part of the // IR. DenseSet PHINodeUses; + DenseMap OpSafeForPHIOfOps; // Map a temporary instruction we created to a parent block. DenseMap TempToBlock; // Map between the already in-program instructions and the temporary phis we @@ -595,7 +596,7 @@ private: // Expression handling. - const Expression *createExpression(Instruction *) const; + const Expression *createExpression(Instruction *, bool) const; const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *) const; PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, @@ -643,6 +644,14 @@ void initializeCongruenceClasses(Function &F); const Expression *makePossiblePhiOfOps(Instruction *, SmallPtrSetImpl &); + Value *findLeaderForInst(Instruction *ValueOp, + SmallPtrSetImpl &Visited, + bool SafeToSimplify, MemoryAccess *MemAccess, + Instruction *OrigInst, BasicBlock *PredBB); + + bool OpIsSafeForPHIOfOps(Value *Op, Instruction *OrigInst, + const BasicBlock *PHIBlock, + SmallPtrSetImpl &); void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); void removePhiOfOps(Instruction *I, PHINode *PHITemp); @@ -703,8 +712,7 @@ void replaceInstruction(Instruction *, Value *); void markInstructionForDeletion(Instruction *); void deleteInstructionsInBlock(BasicBlock *); - Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const; - + Value *findPHIOfOpsLeader(const Expression *E, const BasicBlock *BB) const; // New instruction creation. void handleNewInstruction(Instruction *){}; @@ -982,7 +990,12 @@ return nullptr; } -const Expression *NewGVN::createExpression(Instruction *I) const { +// Create a value expression from the instruction I, replacing operands with +// their leadesr. If SafeToSimplify is true, we use the instruction simplifier +// to try to simplify the resulting expression. It is not always safe to +// simplify, see makePossiblePhiOfOps for why. +const Expression *NewGVN::createExpression(Instruction *I, + bool SafeToSimplify) const { auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); bool AllConstant = setBasicExpressionInfo(I, E); @@ -996,7 +1009,8 @@ if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))) E->swapOperands(0, 1); } - + if (!SafeToSimplify) + return E; // Perform simplification. // TODO: Right now we only check to see if we get a constant result. // We may get a less than constant, but still better, result for @@ -1390,8 +1404,8 @@ } } - const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, - LI, DefiningAccess); + const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI, + DefiningAccess); // If our MemoryLeader is not our defining access, add a use to the // MemoryLeader, so that we get reprocessed when it changes. if (LE->getMemoryLeader() != DefiningAccess) @@ -1781,10 +1795,15 @@ if (PI == LastPredInfo) continue; LastPredInfo = PI; - - // TODO: Along the false edge, we may know more things too, like icmp of + // In phi of ops cases, we may have predicate info that we are evaluating + // in a different context. + if (!DT->dominates(PBranch->To, getBlockForValue(I))) + continue; + // TODO: Along the false edge, we may know more things too, like + // icmp of // same operands is false. - // TODO: We only handle actual comparison conditions below, not and/or. + // TODO: We only handle actual comparison conditions below, not + // and/or. auto *BranchCond = dyn_cast(PBranch->Condition); if (!BranchCond) continue; @@ -1833,7 +1852,7 @@ } } // Create expression will take care of simplifyCmpInst - return createExpression(I); + return createExpression(I, true); } // Return true if V is a value that will always be available (IE can @@ -1875,7 +1894,7 @@ E = performSymbolicLoadEvaluation(I); break; case Instruction::BitCast: { - E = createExpression(I); + E = createExpression(I, true); } break; case Instruction::ICmp: case Instruction::FCmp: { @@ -1915,7 +1934,7 @@ case Instruction::InsertElement: case Instruction::ShuffleVector: case Instruction::GetElementPtr: - E = createExpression(I); + E = createExpression(I, true); break; default: return nullptr; @@ -2355,7 +2374,7 @@ Value *CondEvaluated = findConditionEquivalence(Cond); if (!CondEvaluated) { if (auto *I = dyn_cast(Cond)) { - const Expression *E = createExpression(I); + const Expression *E = createExpression(I, true); if (const auto *CE = dyn_cast(E)) { CondEvaluated = CE->getConstantValue(); } @@ -2458,6 +2477,93 @@ isa(I); } +// Return true if this operand will be safe to pass to the simplifier if used as +// part of a phi of ops instruction. +// +// The reason some operands are unsafe is that we are not trying to recursively +// translate everything back through phi nodes. We actually expect some lookups +// of expressions to fail. However, the simplifier itself sometimes tries to +// look through phi nodes as it simplifies. If we have translated one operand +// through a phi node, and another not, the simplifier will now not realize it +// is looking at things from two different loop iterations, and get wrong +// answers. An example of this is pr33185. +bool NewGVN::OpIsSafeForPHIOfOps(Value *V, Instruction *OrigInst, + const BasicBlock *PHIBlock, + SmallPtrSetImpl &Visited) { + if (!isa(V)) + return true; + auto OISIt = OpSafeForPHIOfOps.find(V); + if (OISIt != OpSafeForPHIOfOps.end()) + return OISIt->second; + // Keep walking until we either dominate the phi block, or hit a phi, or run + // out of things to check. Note the order of checks is important. PHIs that + // strictly dominate our phi block are okay. FIXME: I believe this is too + // restrictive, and the proper check for phi nodes should be whether they + // are in the same block as our current phi node. + if (DT->properlyDominates(getBlockForValue(V), PHIBlock)) { + OpSafeForPHIOfOps.insert({V, true}); + return true; + } + // PHI in the same block. + if (isa(V) && getBlockForValue(V) == PHIBlock) { + OpSafeForPHIOfOps.insert({V, false}); + return false; + } + for (auto Op : cast(V)->operand_values()) { + if (!isa(Op)) + continue; + // See if we already know the answer for this node. + auto OISIt = OpSafeForPHIOfOps.find(Op); + if (OISIt != OpSafeForPHIOfOps.end()) { + if (!OISIt->second) { + OpSafeForPHIOfOps.insert({V, false}); + return false; + } + } + if (!Visited.insert(Op).second) + continue; + if (!OpIsSafeForPHIOfOps(Op, OrigInst, PHIBlock, Visited)) { + OpSafeForPHIOfOps.insert({V, false}); + return false; + } + } + OpSafeForPHIOfOps.insert({V, true}); + return true; +} + +Value *NewGVN::findLeaderForInst(Instruction *ValueOp, + SmallPtrSetImpl &Visited, + bool SafeToSimplify, MemoryAccess *MemAccess, + Instruction *OrigInst, BasicBlock *PredBB) { + unsigned IDFSNum = InstrToDFSNum(OrigInst); + // Make sure it's marked as a temporary instruction. + AllTempInstructions.insert(ValueOp); + // and make sure anything that tries to add it's DFS number is + // redirected to the instruction we are making a phi of ops + // for. + TempToBlock.insert({ValueOp, PredBB}); + InstrDFS.insert({ValueOp, IDFSNum}); + + const Expression *E = SafeToSimplify + ? performSymbolicEvaluation(ValueOp, Visited) + : createExpression(ValueOp, false); + InstrDFS.erase(ValueOp); + AllTempInstructions.erase(ValueOp); + TempToBlock.erase(ValueOp); + if (MemAccess) + TempToMemory.erase(ValueOp); + if (!E) + return nullptr; + auto *FoundVal = findPHIOfOpsLeader(E, PredBB); + if (!FoundVal || FoundVal == OrigInst) { + ExpressionToPhiOfOps[E].insert(OrigInst); + return nullptr; + } + if (auto *SI = dyn_cast(FoundVal)) + FoundVal = SI->getValueOperand(); + return FoundVal; +} + // When we see an instruction that is an op of phis, generate the equivalent phi // of ops form. const Expression * @@ -2475,7 +2581,6 @@ if (!isCycleFree(I)) return nullptr; - unsigned IDFSNum = InstrToDFSNum(I); SmallPtrSet ProcessedPHIs; // TODO: We don't do phi translation on memory accesses because it's // complicated. For a load, we'd need to be able to simulate a new memoryuse, @@ -2488,18 +2593,9 @@ MemAccess->getDefiningAccess()->getBlock() == I->getParent()) return nullptr; + SmallPtrSet VisitedOps; // Convert op of phis to phi of ops for (auto &Op : I->operands()) { - // TODO: We can't handle expressions that must be recursively translated - // IE - // a = phi (b, c) - // f = use a - // g = f + phi of something - // To properly make a phi of ops for g, we'd have to properly translate and - // use the instruction for f. We should add this by splitting out the - // instruction creation we do below. - if (isa(Op) && PHINodeUses.count(cast(Op))) - return nullptr; if (!isa(Op)) continue; auto *OpPHI = cast(Op); @@ -2520,34 +2616,31 @@ Instruction *ValueOp = I->clone(); if (MemAccess) TempToMemory.insert({ValueOp, MemAccess}); - + bool SafeForPHIOfOps = true; + VisitedOps.clear(); for (auto &Op : ValueOp->operands()) { + auto *OrigOp = &*Op; Op = Op->DoPHITranslation(PHIBlock, PredBB); // When this operand changes, it could change whether there is a // leader for us or not. addAdditionalUsers(Op, I); + // If we phi-translated the op, it must be safe. + SafeForPHIOfOps = SafeForPHIOfOps && + (Op != OrigOp || + OpIsSafeForPHIOfOps(Op, I, PHIBlock, VisitedOps)); } - // Make sure it's marked as a temporary instruction. - AllTempInstructions.insert(ValueOp); - // and make sure anything that tries to add it's DFS number is - // redirected to the instruction we are making a phi of ops - // for. - InstrDFS.insert({ValueOp, IDFSNum}); - const Expression *E = performSymbolicEvaluation(ValueOp, Visited); - InstrDFS.erase(ValueOp); - AllTempInstructions.erase(ValueOp); + // FIXME: For those things that are not safe We could generate + // expressions all the way down, and see if this comes out to a + // constant. For anything where that is true, and unsafe, we should + // have made a phi-of-ops (or value numbered it equivalent to something) + // for the pieces already. + FoundVal = !SafeForPHIOfOps + ? nullptr + : findLeaderForInst(ValueOp, Visited, SafeForPHIOfOps, + MemAccess, I, PredBB); ValueOp->deleteValue(); - if (MemAccess) - TempToMemory.erase(ValueOp); - if (!E) - return nullptr; - FoundVal = findPhiOfOpsLeader(E, PredBB); - if (!FoundVal) { - ExpressionToPhiOfOps[E].insert(I); + if (!FoundVal) return nullptr; - } - if (auto *SI = dyn_cast(FoundVal)) - FoundVal = SI->getValueOperand(); } else { DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " << getBlockName(PredBB) @@ -2562,7 +2655,8 @@ auto *ValuePHI = RealToTemp.lookup(I); bool NewPHI = false; if (!ValuePHI) { - ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + ValuePHI = + PHINode::Create(I->getType(), OpPHI->getNumOperands(), "phiofops"); addPhiOfOps(ValuePHI, PHIBlock, I); NewPHI = true; NumGVNPHIOfOpsCreated++; @@ -2687,6 +2781,8 @@ TempToBlock.clear(); TempToMemory.clear(); PHIOfOpsPHIs.clear(); + PHINodeUses.clear(); + OpSafeForPHIOfOps.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -3517,8 +3613,9 @@ } // Given a value and a basic block we are trying to see if it is available in, -// see if the value has a leader available in that block. -Value *NewGVN::findPhiOfOpsLeader(const Expression *E, +// see if the value has a leader available in that block, and that will dominate +// OrigInst. +Value *NewGVN::findPHIOfOpsLeader(const Expression *E, const BasicBlock *BB) const { // It would already be constant if we could make it constant if (auto *CE = dyn_cast(E)) @@ -3538,11 +3635,12 @@ if (!MemberInst) return Member; // If we are looking for something in the same block as the member, it must - // be a leader because this function is looking for operands for a phi node. - if (MemberInst->getParent() == BB || - DT->dominates(MemberInst->getParent(), BB)) { + // occur before OrigInst to be valid. Otherwise, we may pull the wrong + // version over a loop backedge. Similarly, we have to ensure it is not in + // a block dominated by OrigInst, or else we can't guarantee that it isn't + // dependent on it. + if (DT->dominates(MemberInst->getParent(), BB)) return Member; - } } return nullptr; } Index: test/Transforms/NewGVN/completeness.ll =================================================================== --- test/Transforms/NewGVN/completeness.ll +++ test/Transforms/NewGVN/completeness.ll @@ -8,9 +8,9 @@ ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; CHECK: br label [[TMP6:%.*]] ; CHECK: br label [[TMP6]] -; CHECK: [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ] +; CHECK: [[PHIOFOPS:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ] ; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ] -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: ret i32 [[PHIOFOPS]] ; %3 = icmp ne i32 %0, 0 br i1 %3, label %4, label %5 @@ -59,9 +59,9 @@ ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ] ; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ] -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: ret i32 [[PHIOFOPS]] ; entry: @@ -83,9 +83,9 @@ ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] ; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ , [[DELAY]] ] -; CHECK-NEXT: ret <2 x i32> [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[PHIOFOPS]] ; entry: @@ -107,9 +107,9 @@ ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] ; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ , [[DELAY]] ] -; CHECK-NEXT: ret <2 x i32> [[TMP0]] +; CHECK-NEXT: ret <2 x i32> [[PHIOFOPS]] ; entry: @@ -188,11 +188,11 @@ ; CHECK: bb14: ; CHECK-NEXT: br label [[BB15:%.*]] ; CHECK: bb15: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ] ; CHECK-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ] ; CHECK-NEXT: [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ] ; CHECK-NEXT: [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ] -; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP]], align 8 +; CHECK-NEXT: store i64 [[PHIOFOPS]], i64* [[TMP]], align 8 ; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[TMP18]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]] ; CHECK-NEXT: [[TMP22]] = load i64, i64* [[TMP21]], align 8 @@ -263,17 +263,17 @@ ; CHECK-NEXT: entry-block: ; CHECK-NEXT: br label %main-loop ; CHECK: main-loop: -; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ] +; CHECK-NEXT: [[PHIOFOPS1:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ] ; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ] ; CHECK-NEXT: store volatile i8 0, i8* [[ADDR:%.*]] -; CHECK-NEXT: br i1 [[TMP0]], label %busy-wait-phi-0, label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[PHIOFOPS1]], label %busy-wait-phi-0, label [[EXIT:%.*]] ; CHECK: busy-wait-phi-0: ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]] ; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0 ; CHECK-NEXT: br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]] ; CHECK: core: -; CHECK-NEXT: br i1 [[TMP1]], label [[TRAP:%.*]], label %main-loop +; CHECK-NEXT: br i1 [[PHIOFOPS]], label [[TRAP:%.*]], label %main-loop ; CHECK: trap: ; CHECK-NEXT: ret i8 1 ; CHECK: exit: @@ -357,7 +357,7 @@ ; CHECK: bb2: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ] ; CHECK-NEXT: [[TMP8]] = add nuw nsw i32 [[TMP7]], 1 ; CHECK-NEXT: [[TMP11]] = add i32 -14, [[TMP8]] Index: test/Transforms/NewGVN/pr33185.ll =================================================================== --- test/Transforms/NewGVN/pr33185.ll +++ test/Transforms/NewGVN/pr33185.ll @@ -4,8 +4,8 @@ @a = local_unnamed_addr global i32 9, align 4 @.str4 = private unnamed_addr constant [6 x i8] c"D:%d\0A\00", align 1 -define i32 @main() local_unnamed_addr { -; CHECK-LABEL: @main( +define i32 @test1() local_unnamed_addr { +; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[CMP1_I:%.*]] = icmp ne i32 [[TMP]], 0 @@ -57,3 +57,63 @@ declare i32 @printf(i8* nocapture readonly, ...) +;; Variant of the above where we have made the udiv available in each predecessor with the wrong values. +;; In the entry block, it is always 0, so we don't try to create a leader there, only in %cond.end.i. +;; We should not create a phi of ops for it using these leaders. +;; A correct phi of ops for this udiv would be phi(0, 1), which we are not smart enough to figure out. +;; If we reuse the incorrect leaders, we will get phi(0, 0). +define i32 @test2() local_unnamed_addr { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[CMP1_I:%.*]] = icmp ne i32 [[TMP]], 0 +; CHECK-NEXT: br label [[FOR_BODY_I:%.*]] +; CHECK: for.body.i: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[COND_END_I:%.*]] ] +; CHECK-NEXT: [[F_08_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[COND_END_I]] ] +; CHECK-NEXT: [[MUL_I:%.*]] = select i1 [[CMP1_I]], i32 [[F_08_I]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[COND_END_I]], label [[COND_TRUE_I:%.*]] +; CHECK: cond.true.i: +; CHECK-NEXT: [[DIV_I:%.*]] = udiv i32 [[MUL_I]], [[F_08_I]] +; CHECK-NEXT: br label [[COND_END_I]] +; CHECK: cond.end.i: +; CHECK-NEXT: [[COND_I:%.*]] = phi i32 [ [[DIV_I]], [[COND_TRUE_I]] ], [ 0, [[FOR_BODY_I]] ] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[F_08_I]], 1 +; CHECK-NEXT: [[CALL5:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0), i32 0) +; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i32 [[INC_I]], 4 +; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[FN1_EXIT:%.*]], label [[FOR_BODY_I]] +; CHECK: fn1.exit: +; CHECK-NEXT: [[CALL4:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0), i32 [[COND_I]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %tmp = load i32, i32* @a, align 4 + %cmp1.i = icmp ne i32 %tmp, 0 + br label %for.body.i + +for.body.i: + %tmp1 = phi i1 [ true, %entry ], [ false, %cond.end.i ] + %f.08.i = phi i32 [ 0, %entry ], [ %inc.i, %cond.end.i ] + %mul.i = select i1 %cmp1.i, i32 %f.08.i, i32 0 + br i1 %tmp1, label %cond.end.i, label %cond.true.i + +cond.true.i: + ;; Ensure we don't replace this divide with a phi of ops that merges the wrong loop iteration value + %div.i = udiv i32 %mul.i, %f.08.i + br label %cond.end.i + +cond.end.i: + %cond.i = phi i32 [ %div.i, %cond.true.i ], [ 0, %for.body.i ] + %inc.i = add nuw nsw i32 %f.08.i, 1 + %test = udiv i32 %mul.i, %inc.i + %call5= tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0), i32 %test) + %exitcond.i = icmp eq i32 %inc.i, 4 + br i1 %exitcond.i, label %fn1.exit, label %for.body.i + +fn1.exit: + %cond.i.lcssa = phi i32 [ %cond.i, %cond.end.i ] + %call4= tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0), i32 %cond.i.lcssa) + ret i32 0 +} + + Index: test/Transforms/NewGVN/pr33305.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/pr33305.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -newgvn -S %s | FileCheck %s +; Ensure we do not incorrect do phi of ops +source_filename = "/Users/dannyb/sources/llvm-clean/debug-build/pr33305.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +@a = common global i32 0, align 4 +@b = local_unnamed_addr global i32* @a, align 8 +@e = local_unnamed_addr global i32 -1, align 4 +@g = local_unnamed_addr global i32 1, align 4 +@c = common local_unnamed_addr global i32 0, align 4 +@f = common local_unnamed_addr global i32 0, align 4 +@h = common local_unnamed_addr global i32 0, align 4 +@str = private unnamed_addr constant [5 x i8] c"fine\00" +@str.2 = private unnamed_addr constant [8 x i8] c"Screwed\00" + +; Function Attrs: nounwind optsize ssp uwtable +define i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTPR_I:%.*]] = load i32, i32* @c, align 4, !tbaa !3 +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[DOTPR_I]], 1 +; CHECK-NEXT: br i1 [[CMP13_I]], label [[FOR_COND1_PREHEADER_LR_PH_I:%.*]], label [[ENTRY_FOR_END9_I_CRIT_EDGE:%.*]] +; CHECK: entry.for.end9.i_crit_edge: +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* @h, align 4, !tbaa !3 +; CHECK-NEXT: br label [[FOR_END9_I:%.*]] +; CHECK: for.cond1.preheader.lr.ph.i: +; CHECK-NEXT: [[G_PROMOTED14_I:%.*]] = load i32, i32* @g, align 4, !tbaa !3 +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_I:%.*]] +; CHECK: for.cond1.preheader.i: +; CHECK-NEXT: [[INC816_I:%.*]] = phi i32 [ [[DOTPR_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ [[INC8_I:%.*]], [[FOR_INC7_I:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[G_PROMOTED14_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ 0, [[FOR_INC7_I]] ] +; CHECK-NEXT: br label [[FOR_BODY3_I:%.*]] +; CHECK: for.body3.i: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[FOR_COND1_PREHEADER_I]] ], [ true, [[LOR_END_I:%.*]] ] +; CHECK-NEXT: [[INC12_I:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_I]] ], [ [[INC_I:%.*]], [[LOR_END_I]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[FOR_COND1_PREHEADER_I]] ], [ 0, [[LOR_END_I]] ] +; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[OR_COND_I:%.*]] = and i1 [[TMP1]], [[TOBOOL_I]] +; CHECK-NEXT: br i1 [[OR_COND_I]], label [[LOR_END_I]], label [[LOR_RHS_I:%.*]] +; CHECK: lor.rhs.i: +; CHECK-NEXT: [[LNOT_I:%.*]] = xor i1 [[TOBOOL_I]], true +; CHECK-NEXT: [[LNOT_EXT_I:%.*]] = zext i1 [[LNOT_I]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[TMP3]], [[LNOT_EXT_I]] +; CHECK-NEXT: store i32 [[XOR_I]], i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: br label [[LOR_END_I]] +; CHECK: lor.end.i: +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[INC12_I]], 1 +; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i32 [[INC_I]], 2 +; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[FOR_INC7_I]], label [[FOR_BODY3_I]] +; CHECK: for.inc7.i: +; CHECK-NEXT: [[INC8_I]] = add nsw i32 [[INC816_I]], 1 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[INC816_I]], 0 +; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_COND1_PREHEADER_I]], label [[FOR_COND_FOR_END9_CRIT_EDGE_I:%.*]] +; CHECK: for.cond.for.end9_crit_edge.i: +; CHECK-NEXT: store i32 0, i32* @g, align 4, !tbaa !3 +; CHECK-NEXT: store i32 2, i32* @h, align 4, !tbaa !3 +; CHECK-NEXT: store i32 [[INC8_I]], i32* @c, align 4, !tbaa !3 +; CHECK-NEXT: br label [[FOR_END9_I]] +; CHECK: for.end9.i: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[DOTPRE]], [[ENTRY_FOR_END9_I_CRIT_EDGE]] ], [ 2, [[FOR_COND_FOR_END9_CRIT_EDGE_I]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** @b, align 8, !tbaa !7 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP5]], align 4, !tbaa !3 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: [[CMP10_I:%.*]] = icmp slt i32 [[TMP6]], -1 +; CHECK-NEXT: br i1 [[CMP10_I]], label [[IF_THEN_I:%.*]], label [[FN1_EXIT:%.*]] +; CHECK: if.then.i: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @f, align 4, !tbaa !3 +; CHECK-NEXT: store i32 [[TMP7]], i32* [[TMP5]], align 4, !tbaa !3 +; CHECK-NEXT: br label [[FN1_EXIT]] +; CHECK: fn1.exit: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* @a, align 4, !tbaa !3 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[PUTS2:%.*]] = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str.2, i64 0, i64 0)) +; CHECK-NEXT: tail call void @abort() #4 +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: [[PUTS:%.*]] = tail call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str, i64 0, i64 0)) +; CHECK-NEXT: ret i32 0 +; +entry: + %.pr.i = load i32, i32* @c, align 4, !tbaa !3 + %cmp13.i = icmp slt i32 %.pr.i, 1 + br i1 %cmp13.i, label %for.cond1.preheader.lr.ph.i, label %entry.for.end9.i_crit_edge + +entry.for.end9.i_crit_edge: ; preds = %entry + %.pre = load i32, i32* @h, align 4, !tbaa !3 + br label %for.end9.i + +for.cond1.preheader.lr.ph.i: ; preds = %entry + %g.promoted14.i = load i32, i32* @g, align 4, !tbaa !3 + br label %for.cond1.preheader.i + +for.cond1.preheader.i: ; preds = %for.inc7.i, %for.cond1.preheader.lr.ph.i + %inc816.i = phi i32 [ %.pr.i, %for.cond1.preheader.lr.ph.i ], [ %inc8.i, %for.inc7.i ] + %0 = phi i32 [ %g.promoted14.i, %for.cond1.preheader.lr.ph.i ], [ 0, %for.inc7.i ] + br label %for.body3.i + +for.body3.i: ; preds = %lor.end.i, %for.cond1.preheader.i + %1 = phi i1 [ false, %for.cond1.preheader.i ], [ true, %lor.end.i ] + %inc12.i = phi i32 [ 0, %for.cond1.preheader.i ], [ %inc.i, %lor.end.i ] + %2 = phi i32 [ %0, %for.cond1.preheader.i ], [ 0, %lor.end.i ] + %tobool.i = icmp ne i32 %2, 0 + %or.cond.i = and i1 %1, %tobool.i + br i1 %or.cond.i, label %lor.end.i, label %lor.rhs.i + +lor.rhs.i: ; preds = %for.body3.i + %lnot.i = xor i1 %tobool.i, true + %lnot.ext.i = zext i1 %lnot.i to i32 + %3 = load i32, i32* @e, align 4, !tbaa !3 + %xor.i = xor i32 %3, %lnot.ext.i + store i32 %xor.i, i32* @e, align 4, !tbaa !3 + br label %lor.end.i + +lor.end.i: ; preds = %lor.rhs.i, %for.body3.i + %inc.i = add nuw nsw i32 %inc12.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 2 + br i1 %exitcond.i, label %for.inc7.i, label %for.body3.i + +for.inc7.i: ; preds = %lor.end.i + %inc8.i = add nsw i32 %inc816.i, 1 + %cmp.i = icmp slt i32 %inc816.i, 0 + br i1 %cmp.i, label %for.cond1.preheader.i, label %for.cond.for.end9_crit_edge.i + +for.cond.for.end9_crit_edge.i: ; preds = %for.inc7.i + store i32 0, i32* @g, align 4, !tbaa !3 + store i32 2, i32* @h, align 4, !tbaa !3 + store i32 %inc8.i, i32* @c, align 4, !tbaa !3 + br label %for.end9.i + +for.end9.i: ; preds = %entry.for.end9.i_crit_edge, %for.cond.for.end9_crit_edge.i + %4 = phi i32 [ %.pre, %entry.for.end9.i_crit_edge ], [ 2, %for.cond.for.end9_crit_edge.i ] + %5 = load i32*, i32** @b, align 8, !tbaa !7 + store i32 %4, i32* %5, align 4, !tbaa !3 + %6 = load i32, i32* @e, align 4, !tbaa !3 + %cmp10.i = icmp slt i32 %6, -1 + br i1 %cmp10.i, label %if.then.i, label %fn1.exit + +if.then.i: ; preds = %for.end9.i + %7 = load i32, i32* @f, align 4, !tbaa !3 + store i32 %7, i32* %5, align 4, !tbaa !3 + br label %fn1.exit + +fn1.exit: ; preds = %if.then.i, %for.end9.i + %8 = load i32, i32* @a, align 4, !tbaa !3 + %tobool = icmp eq i32 %8, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %fn1.exit + %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str.2, i64 0, i64 0)) + tail call void @abort() #3 + unreachable + +if.end: ; preds = %fn1.exit + %puts = tail call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str, i64 0, i64 0)) + ret i32 0 +} + +; Function Attrs: noreturn nounwind optsize +declare void @abort() local_unnamed_addr #1 + +; Function Attrs: nounwind +declare i32 @puts(i8* nocapture readonly) local_unnamed_addr #2 + +attributes #0 = { nounwind optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noreturn nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } +attributes #3 = { noreturn nounwind optsize } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git e97b4dda83fd49e0218ea06ba4e37796a81b2027) (/Users/dannyb/sources/llvm-clean b38f051979e4ac2aa6513e40046d120fd472cb96)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"any pointer", !5, i64 0} Index: test/Transforms/NewGVN/pr33432.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/pr33432.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -newgvn -S %s | FileCheck %s +; Ensure we do not incorrect do phi of ops +@d = external local_unnamed_addr global i32, align 4 + +define void @patatino() { +; CHECK-LABEL: @patatino( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 +; CHECK-NEXT: br label [[FOR_END10:%.*]] +; CHECK: for.end10: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP0]], 8 +; CHECK-NEXT: br i1 undef, label [[IF_END:%.*]], label [[FOR_END10]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @d, align 4 + br label %for.end10 + +for.end10: + %f.0 = phi i32 [ undef, %entry ], [ 8, %for.end10 ] + %or = or i32 %0, %f.0 + %mul12 = mul nsw i32 %or, undef + br i1 undef, label %if.end, label %for.end10 + +if.end: + ret void +} + Index: test/Transforms/NewGVN/pr33461.ll =================================================================== --- test/Transforms/NewGVN/pr33461.ll +++ test/Transforms/NewGVN/pr33461.ll @@ -8,12 +8,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[FOR_COND1:%.*]], label [[FOR_INC:%.*]] ; CHECK: for.cond1: -; CHECK-NEXT: [[TMP0:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_INC]] ], [ undef, [[ENTRY:%.*]] ] -; CHECK-NEXT: store i16 [[TMP0]], i16* @b, align 2 +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_INC]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: store i16 [[PHIOFOPS]], i16* @b, align 2 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* @b, align 2 -; CHECK-NEXT: [[INC]] = add i16 [[TMP1]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* @b, align 2 +; CHECK-NEXT: [[INC]] = add i16 [[TMP0]], 1 ; CHECK-NEXT: store i16 [[INC]], i16* @b, align 2 ; CHECK-NEXT: br label [[FOR_COND1]] ; Index: test/Transforms/NewGVN/pr34135.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/pr34135.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -newgvn -enable-phi-of-ops=true -S | FileCheck %s +;; Make sure we don't incorrectly use predicateinfo to simplify phi of ops cases +source_filename = "pr34135.ll" + +define void @snork(i32 %arg) { +; CHECK-LABEL: @snork( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[ARG:%.*]] to i64 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[TMP3:%.*]], [[BB1]] ] +; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i64 [[TMP3]], [[TMP]] +; CHECK-NEXT: br i1 [[TMP4]], label [[BB1]], label [[BB7:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i64 [[TMP]], 1 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB7]], label [[BB9:%.*]] +; CHECK: bb7: +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb9: +; CHECK-NEXT: unreachable +; +bb: + %tmp = sext i32 %arg to i64 + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp2 = phi i64 [ 0, %bb ], [ %tmp3, %bb1 ] + %tmp3 = add i64 %tmp2, 1 + %tmp4 = icmp slt i64 %tmp3, %tmp + br i1 %tmp4, label %bb1, label %bb7 + +bb5: ; preds = %bb7 + %tmp6 = icmp sgt i64 %tmp8, 1 + br i1 %tmp6, label %bb7, label %bb9 + +bb7: ; preds = %bb5, %bb1 + %tmp8 = phi i64 [ undef, %bb5 ], [ %tmp, %bb1 ] + br label %bb5 + +bb9: ; preds = %bb5 + unreachable +}