diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -70,13 +70,16 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionPrecedenceTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -110,6 +113,7 @@ #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PredicateInfo.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/VNCoercion.h" #include #include @@ -496,11 +500,13 @@ class NewGVN { Function &F; DominatorTree *DT = nullptr; + PostDominatorTree *PDT = nullptr; const TargetLibraryInfo *TLI = nullptr; AliasAnalysis *AA = nullptr; MemorySSA *MSSA = nullptr; MemorySSAWalker *MSSAWalker = nullptr; AssumptionCache *AC = nullptr; + ImplicitControlFlowTracking *ICF = nullptr; const DataLayout &DL; std::unique_ptr PredInfo; @@ -546,6 +552,11 @@ // created that they are known equivalent to. DenseMap RealToTemp; + // We do not want to apply load coercion in new instructions that are + // generated during phi-of-ops optimization. We use this value to bail out in + // these cases. + Value *CurrentPhiOfOpsInsn = nullptr; + // In order to know when we should re-process instructions that have // phi-of-ops, we track the set of expressions that they needed as // leaders. When we discover new leaders for those expressions, we process the @@ -669,10 +680,10 @@ SmallVector NewLoadsInLoadCoercion; public: - NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, - TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, - const DataLayout &DL) - : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL), + NewGVN(Function &F, DominatorTree *DT, PostDominatorTree *PDT, + AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA, + MemorySSA *MSSA, const DataLayout &DL) + : F(F), DT(DT), PDT(PDT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL), PredInfo(std::make_unique(F, *DT, *AC)), SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false, /*CanUseUndef=*/false) {} @@ -935,6 +946,16 @@ // Extract the value that will replace the load from the depending // instruction. Value *getExtractedValue(LoadInst *, Instruction *); + // Emit the phi that replaces the load and it updates the SSA with the new + // phi. + Value *emitLoadCoercionPhi( + LoadInst *, BasicBlock *, + SmallVectorImpl> &); + // Check if the load can be replaced by a phi. + Value *tryReplaceLoadWithPhi( + LoadInst *, BasicBlock *, + SmallVectorImpl> &, + SmallVectorImpl &); }; } // end anonymous namespace @@ -1163,6 +1184,8 @@ CongruenceClass *CC = ValueToClass.lookup(V); if (CC) { + if (EnableLoadCoercion && LoadCoercion.count(CC->getLeader())) + return ExprResult::none(); if (CC->getLeader() && CC->getLeader() != I) { return ExprResult::some(createVariableOrConstant(CC->getLeader()), V); } @@ -1577,7 +1600,97 @@ if (It != NewLoadsInLoadCoercion.end()) return nullptr; - if (auto *DepSI = dyn_cast(DepInst)) { + if (auto *MemPhi = dyn_cast(DefiningAccess)) { + // If the candidate load is dominated by a call that never returns, then we + // do not replace the load with a phi node because this will break the + // semantics of the code. + if (ICF->isDominatedByICFIFromSameBlock(LI)) + return nullptr; + + // The MemoryPhi of Example 1 indicates that the load is dependent on the + // store (1) in Basic block T and store (2) in basic block F. Therefore, + // both of the store instructions should be added in LoadCoercion map. + // + // Example 1: + // BB1: BB2: + // 1 = MemoryDef(liveOnEntry) 2 = MemoryDef(liveOnEntry) + // store i32 100, i32* %P store i32 500, i32* %P + // \ / + // BB3: + // 3 = MemoryPhi({T,1},{F,2}) + // %V = load i32, i32* %P + // + // In Example 2, the load of BB3 has two dependencies: i. the store in BB1 + // as the MemoryPhi indicates and the load in BB2 which is not included in + // MemoryPhi. To find this dependency, we have to find all the uses that are + // live on Entry and check if any of them can optimize out the current load. + // + // Example 2: + // BB1: BB2: + // 1 = MemoryDef(liveOnEntry) 0 = MemoryDef(liveOnEntry) + // store i32 100, i32* %P %V1 = load i32, i32* %P + // \ / + // BB3: + // 2 = MemoryPhi({T,1},{F,liveOnEntry}) + // %V2 = load i32, i32* %P + // + + // Iterate over all the operands of the memory phi and check if any of its + // operands can optimize out the current load. + for (Use &Op : MemPhi->incoming_values()) { + // Bail out if one of the operands is not a memory use or definition. + if (!isa(&Op)) + return nullptr; + + MemoryUseOrDef *MemAccess = cast(&Op); + assert(MemAccess && "Memory definition is exepcted.\n"); + int Offset = -1; + Instruction *DepI = nullptr; + + // If any of the operands of the MemoryPhi is live on entry (Example 2), + // then we have to check if there is a load instruction that can optimize + // out the current load instruction. + if (MSSA->isLiveOnEntryDef(MemAccess)) { + MemoryAccess *MemAccess = MSSA->getLiveOnEntryDef(); + for (const auto &U : MemAccess->uses()) { + Offset = -1; + auto *MemUse = dyn_cast(U.getUser()); + if (MemUse == nullptr) + continue; + DepI = MemUse->getMemoryInst(); + if (!isa(DepI) || DepI == LI || DepI->getNumUses() == 0) + continue; + // Check if DepI is in the current incoming block or it is in a + // predecessor that dominates the incoming block. + BasicBlock *DepIBB = DepI->getParent(); + BasicBlock *IncomingBB = MemPhi->getIncomingBlock(Op); + if (IncomingBB == DepIBB || DT->dominates(DepIBB, IncomingBB)) + Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, + cast(DepI), DL); + if (Offset >= 0) + tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepI); + } + } else { + // Check if the MemoryPhi operand can optimize out the current load. + DepI = MemAccess->getMemoryInst(); + if (DT->dominates(LI, DepI)) { + // In this case, there is a loop. For now, we bail-out load coercion. + const_cast(this)->LoadCoercion.erase(LI); + return nullptr; + } else if (StoreInst *S = dyn_cast(DepI)) + Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, S, DL); + else if (LoadInst *L = dyn_cast(DepI)) { + Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, L, DL); + } else { + const_cast(this)->LoadCoercion.erase(LI); + return nullptr; + } + if (Offset >= 0) + tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepI); + } + } + return nullptr; + } else if (auto *DepSI = dyn_cast(DepInst)) { // Can't forward from non-atomic to atomic without violating memory model. // Also don't need to coerce if they are the same type, we will just // propagate. @@ -1618,8 +1731,8 @@ return createConstantExpression(PossibleConstant); } } else if (EnableLoadCoercion) { - // Similarly, we do not create a load expression for the loads that are - // elimianted with load coercion. + // Similarly, we do not create a load expression for the loads that + // are elimianted with load coercion. tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepInst); return nullptr; } @@ -1689,8 +1802,18 @@ MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(OriginalAccess); + // If the load is generated during phi-of-ops optimization, then we do not + // apply load coercion. + if (LI == CurrentPhiOfOpsInsn) + return createLoadExpAndUpdateMemUses(LI, LoadAddressLeader, OriginalAccess, + DefiningAccess); + // Check if we can apply load coercion. - if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { + if (auto *MemPhi = dyn_cast(DefiningAccess)) { + if (EnableLoadCoercion) + performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(), LI, + nullptr, DefiningAccess); + } else if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { Instruction *DefiningInst = MD->getMemoryInst(); // If the defining instruction is not reachable, replace with poison. @@ -1705,7 +1828,7 @@ DefiningInst, DefiningAccess)) return CoercionResult; } - } else if (EnableLoadCoercion) { + } else if (EnableLoadCoercion && !ICF->isDominatedByICFIFromSameBlock(LI)) { // Check if any of the live-in loads can be eliminated with load coercion. for (const auto &U : DefiningAccess->uses()) if (auto *MemUse = dyn_cast(U.getUser())) { @@ -1733,7 +1856,11 @@ continue; // The two loads should be executed in the right order. - if (DependingLoadDomintatesLI) + // TODO: Add analysis that explores more possibilities. + if (DependingLoadDomintatesLI || + (ReachableEdges.count( + {DependingLoad->getParent(), LI->getParent()}) && + PDT->dominates(LI, DependingLoad))) performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(), LI, DependingLoad, getMemoryAccess(DependingLoad)); @@ -1776,6 +1903,9 @@ AdditionallyUsedValue = CmpOp1; } + if (EnableLoadCoercion && LoadCoercion.count(FirstOp)) + return ExprResult::none(); + if (Predicate == CmpInst::ICMP_EQ) return ExprResult::some(createVariableOrConstant(FirstOp), AdditionallyUsedValue, PI); @@ -2955,6 +3085,7 @@ // Clone the instruction, create an expression from it that is // translated back into the predecessor, and see if we have a leader. Instruction *ValueOp = I->clone(); + CurrentPhiOfOpsInsn = ValueOp; if (MemAccess) TempToMemory.insert({ValueOp, MemAccess}); bool SafeForPHIOfOps = true; @@ -3308,10 +3439,12 @@ // If we created a phi of ops, use it. // If we couldn't create one, make sure we don't leave one lying around if (PHIE) { + LoadCoercion.erase(I); Symbolized = PHIE; } else if (auto *Op = RealToTemp.lookup(I)) { removePhiOfOps(I, Op); } + CurrentPhiOfOpsInsn = nullptr; } } else { // Mark the instruction as unused so we don't value number it again. @@ -3627,6 +3760,8 @@ NumFuncArgs = F.arg_size(); MSSAWalker = MSSA->getWalker(); SingletonDeadExpression = new (ExpressionAllocator) DeadExpression(); + ImplicitControlFlowTracking ImplicitCFT; + ICF = &ImplicitCFT; // Count number of instructions for sizing of hash tables, and come // up with a global dfs numbering for instructions. @@ -4095,6 +4230,147 @@ return NewValue; } +// Create the phi node that replaces the load in load coercion. +Value *NewGVN::emitLoadCoercionPhi( + LoadInst *LI, BasicBlock *InsertBB, + SmallVectorImpl> + &PredsWithAvailableValue) { + Value *NewValue = nullptr; + SmallVector NewPHIs; + SSAUpdater SSAUpdate(&NewPHIs); + SSAUpdate.Initialize(LI->getType(), LI->getName()); + + for (const auto &P : PredsWithAvailableValue) { + BasicBlock *PredBB = P.first; + Instruction *DepI = P.second; + + if (SSAUpdate.HasValueForBlock(PredBB)) + continue; + + // Get the incoming value for this block. This values is the value that we + // extract from the corresponding depending instruction. + NewValue = getExtractedValue(LI, DepI); + // Match the coerced value with the corresponding incoming block. + SSAUpdate.AddAvailableValue(PredBB, NewValue); + } + + // Generate the phi node. + NewValue = SSAUpdate.GetValueInMiddleOfBlock(InsertBB); + // Run value numbering for the new phi node. + if (Instruction *I = dyn_cast(NewValue)) + runValueNumberingForLoadCoercionInsns(I); + + return NewValue; +} + +// Check if the load can be removed and replace it with a phi node. In Example +// 1, all the predecessors have a depending instruction. Therefore, the load is +// replaced by a phi node whose incoming values are extracted from each +// depending instruction. +// +// Example 1: +// Before Load Coercion: +// BB1: BB2: +// store i32 100, i32* %P store i32 500, i32* %P +// \ / +// BB3: +// %V = load i32, i32* %P +// +// After Load Coercion: +// BB1: BB2: +// store i32 100, i32* %P store i32 500, i32* %P +// \ / +// BB3: +// %phi = phi i32 [ 100, %BB1], [ 500, %BB2 ] +// +// In example 2, there is only one depending instruction in BB1. We eliminate +// the load of BB3 by adding an artificial dependency in BB2. This is done by +// adding a new load (%V1) in BB2. Now, the load of BB3 has two dpendencies. +// Therefore, we can replace it with a phi node as it is shown below: +// +// Example 2: +// Before Load Coercion: +// BB1: BB2: +// store i32 100, i32* %P | +// \ / +// BB3: +// %V = load i32, i32* %P +// +// After Load Coercion: +// BB1: BB2: +// store i32 100, i32* %P %V1 = load i32, i32* %P +// \ / +// BB3: +// %phi = phi i32 [ 100, %BB1], [ %V2, %BB2 ] +// +Value *NewGVN::tryReplaceLoadWithPhi( + LoadInst *LI, BasicBlock *InsertBB, + SmallVectorImpl> &PhiOperands, + SmallVectorImpl &IncomingBlocksWithoutDep) { + + // If we have found all the phi operands (Example 1), then we are ready to + // replace the load with a phi node. + if (PhiOperands.size() == pred_size(InsertBB)) + return emitLoadCoercionPhi(LI, InsertBB, PhiOperands); + + // If there are more than one predecessors without a depending instruction, + // then we do not perform load coercion. + // TODO: Create a new common predecessor and emit a new load in the common + // predecessor. + if (IncomingBlocksWithoutDep.size() > 1) + return nullptr; + + BasicBlock *IncomingBlock = IncomingBlocksWithoutDep.back(); + // Do not add a new load in EHPad that does not allow non-phi instructions. + if (IncomingBlock->getTerminator()->isEHPad()) + return nullptr; + + // Do not add a new load inside a loop. + // TODO: Create a new basic block between the loop latch and the + // InsertBB. + if (DT->dominates(InsertBB, IncomingBlock)) + return nullptr; + + // TODO: Add support for the case where IncomingBlock has more than one + // successors. + if (succ_size(IncomingBlock) != 1) + return nullptr; + + // Emit a new load in the IncomingBlock and call emitLoadCoercionPhi to + // replace the Li with a new phi node. + Value *LIPtr = LI->getPointerOperand(); + SmallVector NewInsts; + PHITransAddr Address(LIPtr, DL, AC); + // Emits the pointer of the new load if it is not available in the incoming + // block. + LIPtr = + Address.PHITranslateWithInsertion(InsertBB, IncomingBlock, *DT, NewInsts); + if (!LIPtr) + return nullptr; + + // Generate a new load instruction in the incoming block. + auto *NewDependingLoad = new LoadInst( + LI->getType(), LIPtr, LI->getName(), LI->isVolatile(), LI->getAlign(), + LI->getOrdering(), LI->getSyncScopeID(), IncomingBlock->getTerminator()); + + NewInsts.push_back(NewDependingLoad); + + // Update the debug information of the new load. + NewDependingLoad->setDebugLoc(LI->getDebugLoc()); + + // Update MemorySSA with the new load instruction. + updateMemorySSA(LI, NewDependingLoad); + + // Add the basic block in the vector with the basic blokcs that have a + // depending instruction. + PhiOperands.push_back(std::make_pair(IncomingBlock, NewDependingLoad)); + for (auto *CurI : NewInsts) + runValueNumberingForLoadCoercionInsns(CurI); + + // Create a phi node. + return emitLoadCoercionPhi(LI, InsertBB, PhiOperands); +} + // Iterate over the load instructions of LoadCoercion map and it replaces // them with the right sequence of instructions. bool NewGVN::implementLoadCoercion() { @@ -4104,11 +4380,98 @@ SmallPtrSet DependingInsns = P.second; Value *NewValue = nullptr; Instruction *FirstDepI = *DependingInsns.begin(); + MemoryAccess *OriginalAccess = getMemoryAccess(LI); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); - if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI)) + // Check whether the load shoud be replaced with a phi node or we should + // just extract the correct value from the depending instruction. + // + // We do not replace the load with a phi node if there is only one + // depending instruction and it dominates the load as it is show in the + // following two examples: + // + // BB1: BB1: + // store i32 100, i32* %P store i32 100, i32* %P + // ... / \ + // load i32, i32 *%P BB2: BB3: + // \ / + // BB4: + // load i32, i32* %P + // + if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI) && + !isa(DefiningAccess)) // Extract the correct value from the depending instruction. NewValue = getExtractedValue(LI, FirstDepI); + else { + // Before we replace the load with a phi node, we should find the operands + // of the phi node. In Example 1, it is straightforward that the operands + // of the new phi node is (100, %BB1) and (500, %BB2). + // + // Example 1: + // BB1: BB2: + // store i32 100, i32* %P store i32 500, i32* %P + // \ / + // BB3: + // %V = load i32, i32* %P + // + // However, this is not the case in Example 2. The operands of the new phi + // node should be (100, %BB2) and (%V1, %BB3). Hence, the incoming value + // might not be in an incoming block. But, the incoming value can aslo be + // in a basic block that dominates the incoming block. + // BB1: + // %V1 = load i32, i32* %P + // / \ + // BB2: BB3: + // store i32 100, i32* %P | + // \ / + // BB4: + // %V2 = load i32, i32* %P + // + // Match the incoming values to the corresponging incoming blocks. + BasicBlock *InsertBB = isa(DefiningAccess) + ? DefiningAccess->getBlock() + : LI->getParent(); + SmallVector, 2> + IncomingBlocksWithDep; + SmallVector IncomingBlocksWithoutDep; + SmallVector LIPredBBs; + for (BasicBlock *PredBB : predecessors(InsertBB)) + LIPredBBs.push_back(PredBB); + SmallPtrSet LIDependingInsns; + for (Instruction *DepI : DependingInsns) + LIDependingInsns.insert(DepI); + // First, we find the incoming values that belong to the predecessors of + // the candidate load. + for (Instruction *DepI : llvm::make_early_inc_range(LIDependingInsns)) { + BasicBlock *DepIBB = DepI->getParent(); + auto It = llvm::find_if(LIPredBBs, [DepIBB](BasicBlock *PredBB) { + return DepIBB == PredBB; + }); + if (It != LIPredBBs.end()) { + IncomingBlocksWithDep.push_back(std::make_pair(DepIBB, DepI)); + LIPredBBs.erase(It); + LIDependingInsns.erase(DepI); + } + } + + // Next, we find the incoming values that belong to basic blocks that + // dominate one of the predecessors of the candidate load. + for (BasicBlock *PredBB : LIPredBBs) { + auto It = + llvm::find_if(LIDependingInsns, [PredBB, this](Instruction *DepI) { + return DT->dominates(DepI->getParent(), PredBB); + }); + + if (It != LIDependingInsns.end()) + IncomingBlocksWithDep.push_back(std::make_pair(PredBB, *It)); + else + IncomingBlocksWithoutDep.push_back(PredBB); + } + NewValue = tryReplaceLoadWithPhi(LI, InsertBB, IncomingBlocksWithDep, + IncomingBlocksWithoutDep); + } // If we could not eliminate the load, then we need to create a load // expression for the load and run value numbering in order to add it in the // correct congruence class. @@ -4121,6 +4484,12 @@ DefiningAccess); valueNumberInstruction(LI); updateProcessedCount(LI); + for (Use &U : LI->uses()) { + if (auto *II = dyn_cast(U.getUser())) { + valueNumberInstruction(II); + updateProcessedCount(II); + } + } continue; } @@ -4133,9 +4502,15 @@ LI->replaceAllUsesWith(NewValue); // Run value numbering for the uses of the load after updating them with the // new value. In this way, we might be able to eliminate them. - for (Instruction *User : LIUses) { - valueNumberInstruction(User); - updateProcessedCount(User); + for (Instruction *I : LIUses) { + valueNumberInstruction(I); + updateProcessedCount(I); + for (Use &U : I->uses()) { + if (auto *II = dyn_cast(U.getUser())) { + valueNumberInstruction(II); + updateProcessedCount(II); + } + } } // Update the name of the phi node if we generated one. if (isa(NewValue)) @@ -4148,7 +4523,6 @@ << *NewValue << "\n"); AnythingReplaced = true; } - return AnythingReplaced; } @@ -4568,6 +4942,8 @@ AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } }; @@ -4577,6 +4953,7 @@ if (skipFunction(F)) return false; return NewGVN(F, &getAnalysis().getDomTree(), + &getAnalysis().getPostDomTree(), &getAnalysis().getAssumptionCache(F), &getAnalysis().getTLI(F), &getAnalysis().getAAResults(), @@ -4592,6 +4969,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) @@ -4607,12 +4985,13 @@ // the same order here, just in case. auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); + auto &PDT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &MSSA = AM.getResult(F).getMSSA(); - bool Changed = - NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getParent()->getDataLayout()) - .runGVN(); + bool Changed = NewGVN(F, &DT, &PDT, &AC, &TLI, &AA, &MSSA, + F.getParent()->getDataLayout()) + .runGVN(); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll b/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll --- a/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll +++ b/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll @@ -248,7 +248,7 @@ define i8 @irreducible_memoryphi(i8* noalias %arg, i8* noalias %arg2) { ; CHECK-LABEL: @irreducible_memoryphi( ; CHECK-NEXT: bb: -; CHECK-NEXT: store i8 0, i8* [[ARG:%.*]] +; CHECK-NEXT: store i8 0, i8* [[ARG:%.*]], align 1 ; CHECK-NEXT: br i1 undef, label [[BB2:%.*]], label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB2]] diff --git a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll @@ -10,31 +10,18 @@ ; vv ; Exit ; -; OLDGVN-LABEL: @test1( -; OLDGVN-NEXT: Entry: -; OLDGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; OLDGVN: T: -; OLDGVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 -; OLDGVN-NEXT: br label [[EXIT:%.*]] -; OLDGVN: F: -; OLDGVN-NEXT: store i32 13, i32* [[P]], align 4 -; OLDGVN-NEXT: br label [[EXIT]] -; OLDGVN: Exit: -; OLDGVN-NEXT: [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[T]] ] -; OLDGVN-NEXT: ret i32 [[V2]] -; -; NEWGVN-LABEL: @test1( -; NEWGVN-NEXT: Entry: -; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; NEWGVN: T: -; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 -; NEWGVN-NEXT: br label [[EXIT:%.*]] -; NEWGVN: F: -; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 -; NEWGVN-NEXT: br label [[EXIT]] -; NEWGVN: Exit: -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P]], align 4 -; NEWGVN-NEXT: ret i32 [[V2]] +; GVN-LABEL: @test1( +; GVN-NEXT: Entry: +; GVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; GVN: T: +; GVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 +; GVN-NEXT: br label [[EXIT:%.*]] +; GVN: F: +; GVN-NEXT: store i32 13, i32* [[P]], align 4 +; GVN-NEXT: br label [[EXIT]] +; GVN: Exit: +; GVN-NEXT: [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[T]] ] +; GVN-NEXT: ret i32 [[V2]] ; Entry: br i1 %Cond, label %T, label %F @@ -87,11 +74,10 @@ ; NEWGVN-NEXT: store float 1.000000e+00, float* [[P1]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ] +; NEWGVN-NEXT: [[PHIOFOPS:%.*]] = phi i8 [ 0, [[F]] ], [ 84, [[T]] ] +; NEWGVN-NEXT: [[V2:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ] ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[V3:%.*]] = add i8 [[V2]], [[PHI]] -; NEWGVN-NEXT: ret i8 [[V3]] +; NEWGVN-NEXT: ret i8 [[PHIOFOPS]] ; Entry: %P1 = bitcast i32* %P to float* @@ -145,9 +131,10 @@ ; NEWGVN-NEXT: store i32 42, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: +; NEWGVN-NEXT: [[V41:%.*]] = load i8, i8* [[P4]], align 1 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[V4:%.*]] = load i8, i8* [[P4]], align 1 +; NEWGVN-NEXT: [[V4:%.*]] = phi i8 [ [[V41]], [[F]] ], [ 0, [[T]] ] ; NEWGVN-NEXT: ret i8 [[V4]] ; Entry: @@ -260,10 +247,11 @@ ; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: F2: +; NEWGVN-NEXT: [[V11:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: +; NEWGVN-NEXT: [[V1:%.*]] = phi i32 [ 42, [[T]] ], [ 13, [[F1]] ], [ [[V11]], [[F2]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ 1, [[T]] ], [ 2, [[F1]] ], [ 3, [[F2]] ] -; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: [[V2:%.*]] = add i32 [[PHI]], [[V1]] ; NEWGVN-NEXT: ret i32 [[V2]] ; @@ -300,27 +288,16 @@ ; v v ; Exit ; -; OLDGVN-LABEL: @test6( -; OLDGVN-NEXT: Entry: -; OLDGVN-NEXT: store i32 42, i32* [[P:%.*]], align 4 -; OLDGVN-NEXT: br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] -; OLDGVN: Loop: -; OLDGVN-NEXT: store i32 13, i32* [[P]], align 4 -; OLDGVN-NEXT: br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]] -; OLDGVN: Exit: -; OLDGVN-NEXT: [[V:%.*]] = phi i32 [ 13, [[LOOP]] ], [ 42, [[ENTRY:%.*]] ] -; OLDGVN-NEXT: ret i32 [[V]] -; -; NEWGVN-LABEL: @test6( -; NEWGVN-NEXT: Entry: -; NEWGVN-NEXT: store i32 42, i32* [[P:%.*]], align 4 -; NEWGVN-NEXT: br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] -; NEWGVN: Loop: -; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 -; NEWGVN-NEXT: br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]] -; NEWGVN: Exit: -; NEWGVN-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 4 -; NEWGVN-NEXT: ret i32 [[V]] +; GVN-LABEL: @test6( +; GVN-NEXT: Entry: +; GVN-NEXT: store i32 42, i32* [[P:%.*]], align 4 +; GVN-NEXT: br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] +; GVN: Loop: +; GVN-NEXT: store i32 13, i32* [[P]], align 4 +; GVN-NEXT: br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]] +; GVN: Exit: +; GVN-NEXT: [[V:%.*]] = phi i32 [ 13, [[LOOP]] ], [ 42, [[ENTRY:%.*]] ] +; GVN-NEXT: ret i32 [[V]] ; Entry: store i32 42, i32* %P @@ -376,12 +353,12 @@ ; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 ; NEWGVN-NEXT: br i1 [[COND2:%.*]], label [[INNER_LOOP]], label [[OUTER_LOOP_LATCH]] ; NEWGVN: Outer.Loop.Latch: +; NEWGVN-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ 242, [[OUTER_LOOP]] ], [ 113, [[INNER_LOOP]] ] +; NEWGVN-NEXT: [[V2:%.*]] = phi i32 [ 13, [[INNER_LOOP]] ], [ 42, [[OUTER_LOOP]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ 100, [[INNER_LOOP]] ], [ 200, [[OUTER_LOOP]] ] -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P]], align 4 -; NEWGVN-NEXT: [[V3:%.*]] = add i32 [[V2]], [[PHI]] ; NEWGVN-NEXT: br i1 [[COND3:%.*]], label [[OUTER_LOOP]], label [[EXIT:%.*]] ; NEWGVN: Exit: -; NEWGVN-NEXT: ret i32 [[V3]] +; NEWGVN-NEXT: ret i32 [[PHIOFOPS]] ; Entry: br label %Outer.Loop @@ -440,14 +417,17 @@ ; NEWGVN: T: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[V3:%.*]] = extractelement <2 x i32> [[V1]], i64 1 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: +; NEWGVN-NEXT: [[P2_PHI_TRANS_INSERT:%.*]] = bitcast i8* [[P]] to i32* +; NEWGVN-NEXT: [[V21:%.*]] = load i32, i32* [[P2_PHI_TRANS_INSERT]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: +; NEWGVN-NEXT: [[V2:%.*]] = phi i32 [ [[V21]], [[F]] ], [ [[TMP1]], [[T]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ 100, [[F]] ], [ [[V3]], [[T]] ] -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 ; NEWGVN-NEXT: [[V4:%.*]] = add i32 [[V2]], [[PHI]] ; NEWGVN-NEXT: ret i32 [[V4]] ; @@ -503,15 +483,18 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: +; NEWGVN-NEXT: [[V41:%.*]] = load i32, i32* [[P:%.*]], align 4 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* +; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to <2 x i32>* ; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: +; NEWGVN-NEXT: [[V4:%.*]] = phi i32 [ [[V41]], [[T]] ], [ [[TMP1]], [[F]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[T]] ], [ [[V3]], [[F]] ] -; NEWGVN-NEXT: [[V4:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[V4]], [[PHI]] ; NEWGVN-NEXT: ret i32 [[V5]] ; @@ -573,11 +556,13 @@ ; NEWGVN: F: ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to <2 x i32>* ; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: +; NEWGVN-NEXT: [[V4:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[TMP1]], [[F]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[V3]], [[F]] ] -; NEWGVN-NEXT: [[V4:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[PHI]], [[PHI]] ; NEWGVN-NEXT: [[V6:%.*]] = add i32 [[V5]], [[V4]] ; NEWGVN-NEXT: ret i32 [[V6]] @@ -731,14 +716,16 @@ ; NEWGVN: T: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32 ; NEWGVN-NEXT: [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: ; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: +; NEWGVN-NEXT: [[V3:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 13, [[F]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 100, [[F]] ] -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: [[V4:%.*]] = add i32 [[PHI]], [[V3]] ; NEWGVN-NEXT: ret i32 [[V4]] ; @@ -796,10 +783,8 @@ ; NEWGVN-NEXT: store i32 100, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ 13, [[T]] ], [ 100, [[F]] ] -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P]], align 4 -; NEWGVN-NEXT: [[V4:%.*]] = add i32 [[PHI]], [[V3]] -; NEWGVN-NEXT: ret i32 [[V4]] +; NEWGVN-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ 200, [[F]] ], [ 26, [[T]] ] +; NEWGVN-NEXT: ret i32 [[PHIOFOPS]] ; Entry: %P1 = bitcast i32* %P to <4 x i32>* @@ -830,27 +815,16 @@ ; v v ; Exit ; -; OLDGVN-LABEL: @test15( -; OLDGVN-NEXT: Entry: -; OLDGVN-NEXT: store i32 42, i32* [[P:%.*]], align 4 -; OLDGVN-NEXT: br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; OLDGVN: BB: -; OLDGVN-NEXT: store i32 13, i32* [[P]], align 4 -; OLDGVN-NEXT: br label [[EXIT]] -; OLDGVN: Exit: -; OLDGVN-NEXT: [[V1:%.*]] = phi i32 [ 13, [[BB]] ], [ 42, [[ENTRY:%.*]] ] -; OLDGVN-NEXT: ret i32 [[V1]] -; -; NEWGVN-LABEL: @test15( -; NEWGVN-NEXT: Entry: -; NEWGVN-NEXT: store i32 42, i32* [[P:%.*]], align 4 -; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; NEWGVN: BB: -; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 -; NEWGVN-NEXT: br label [[EXIT]] -; NEWGVN: Exit: -; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P]], align 4 -; NEWGVN-NEXT: ret i32 [[V1]] +; GVN-LABEL: @test15( +; GVN-NEXT: Entry: +; GVN-NEXT: store i32 42, i32* [[P:%.*]], align 4 +; GVN-NEXT: br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] +; GVN: BB: +; GVN-NEXT: store i32 13, i32* [[P]], align 4 +; GVN-NEXT: br label [[EXIT]] +; GVN: Exit: +; GVN-NEXT: [[V1:%.*]] = phi i32 [ 13, [[BB]] ], [ 42, [[ENTRY:%.*]] ] +; GVN-NEXT: ret i32 [[V1]] ; Entry: store i32 42, i32* %P @@ -894,14 +868,16 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32 ; NEWGVN-NEXT: [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2 ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] ; NEWGVN: BB: ; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY:%.*]] ], [ 100, [[BB]] ] -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P]], align 4 +; NEWGVN-NEXT: [[V3:%.*]] = phi i32 [ [[TMP1]], [[ENTRY:%.*]] ], [ 13, [[BB]] ] +; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY]] ], [ 100, [[BB]] ] ; NEWGVN-NEXT: [[V4:%.*]] = add i32 [[PHI]], [[V3]] ; NEWGVN-NEXT: ret i32 [[V4]] ; @@ -1010,6 +986,8 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32 ; NEWGVN-NEXT: [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2 ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: @@ -1019,8 +997,8 @@ ; NEWGVN-NEXT: store i32 100, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: +; NEWGVN-NEXT: [[V4:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 100, [[F]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 13, [[F]] ] -; NEWGVN-NEXT: [[V4:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[PHI]], [[V4]] ; NEWGVN-NEXT: ret i32 [[V5]] ; @@ -1082,10 +1060,10 @@ ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: ; NEWGVN-NEXT: store i32 [[V:%.*]], i32* [[P]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = trunc i32 [[V]] to i16 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i16* -; NEWGVN-NEXT: [[V2:%.*]] = load i16, i16* [[P2]], align 2 +; NEWGVN-NEXT: [[V2:%.*]] = phi i16 [ [[TMP0]], [[F]] ], [ 13, [[T]] ] ; NEWGVN-NEXT: ret i16 [[V2]] ; Entry: @@ -1208,9 +1186,10 @@ ; NEWGVN-NEXT: store i32 42, i32* [[P1:%.*]], align 4 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: +; NEWGVN-NEXT: [[V11:%.*]] = load i32, i32* [[P1]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1]], align 4 +; NEWGVN-NEXT: [[V1:%.*]] = phi i32 [ [[V11]], [[F]] ], [ 42, [[T]] ] ; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2:%.*]], align 4 ; NEWGVN-NEXT: [[V3:%.*]] = add i32 [[V1]], [[V2]] ; NEWGVN-NEXT: ret i32 [[V3]] @@ -1262,13 +1241,15 @@ ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: ; NEWGVN-NEXT: store i32 42, i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[V23:%.*]] = load i32, i32* [[P2:%.*]], align 4 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: -; NEWGVN-NEXT: store i32 13, i32* [[P2:%.*]], align 4 +; NEWGVN-NEXT: store i32 13, i32* [[P2]], align 4 +; NEWGVN-NEXT: [[V11:%.*]] = load i32, i32* [[P1]], align 4 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1]], align 4 -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 +; NEWGVN-NEXT: [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V23]], [[T]] ] +; NEWGVN-NEXT: [[V1:%.*]] = phi i32 [ [[V11]], [[F]] ], [ 42, [[T]] ] ; NEWGVN-NEXT: [[V3:%.*]] = add i32 [[V1]], [[V2]] ; NEWGVN-NEXT: ret i32 [[V3]] ; @@ -1384,11 +1365,12 @@ ; NEWGVN: F: ; NEWGVN-NEXT: br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]] ; NEWGVN: F1: +; NEWGVN-NEXT: [[V31:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: br label [[BB]] ; NEWGVN: F2: ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: BB: -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P]], align 4 +; NEWGVN-NEXT: [[V3:%.*]] = phi i32 [ [[V31]], [[F1]] ], [ 42, [[T]] ] ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: ; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V3]], [[BB]] ], [ 100, [[F2]] ] @@ -1457,14 +1439,17 @@ ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = lshr i32 [[V1]], 16 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: +; NEWGVN-NEXT: [[P2_PHI_TRANS_INSERT:%.*]] = bitcast i32* [[P1]] to i8* +; NEWGVN-NEXT: [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, i8* [[P2_PHI_TRANS_INSERT]], i32 2 +; NEWGVN-NEXT: [[V21:%.*]] = load i8, i8* [[GEP_PHI_TRANS_INSERT]], align 1 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: +; NEWGVN-NEXT: [[V2:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ [[V21]], [[F]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i8 [ 0, [[T]] ], [ 100, [[F]] ] -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[P2]], i32 2 -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[GEP]], align 1 ; NEWGVN-NEXT: [[V3:%.*]] = add i8 [[PHI]], [[V2]] ; NEWGVN-NEXT: ret i8 [[V3]] ; @@ -1522,16 +1507,17 @@ ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: ; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4 -; NEWGVN-NEXT: [[V2:%.*]] = lshr i32 [[V1]], 16 -; NEWGVN-NEXT: [[V3:%.*]] = trunc i32 [[V2]] to i8 +; NEWGVN-NEXT: [[TMP0:%.*]] = lshr i32 [[V1]], 16 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: +; NEWGVN-NEXT: [[P2_PHI_TRANS_INSERT:%.*]] = bitcast i32* [[P1]] to i8* +; NEWGVN-NEXT: [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, i8* [[P2_PHI_TRANS_INSERT]], i32 2 +; NEWGVN-NEXT: [[V41:%.*]] = load i8, i8* [[GEP_PHI_TRANS_INSERT]], align 1 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i8 [ [[V3]], [[T]] ], [ 100, [[F]] ] -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[P2]], i32 2 -; NEWGVN-NEXT: [[V4:%.*]] = load i8, i8* [[GEP]], align 1 +; NEWGVN-NEXT: [[V4:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ [[V41]], [[F]] ] +; NEWGVN-NEXT: [[PHI:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ 100, [[F]] ] ; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[PHI]], [[V4]] ; NEWGVN-NEXT: ret i8 [[V5]] ; @@ -1667,47 +1653,26 @@ ; v v v ; Exit ; -; OLDGVN-LABEL: @test28( -; OLDGVN-NEXT: Entry: -; OLDGVN-NEXT: br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]] -; OLDGVN: T: -; OLDGVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 -; OLDGVN-NEXT: br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; OLDGVN: F: -; OLDGVN-NEXT: store i32 13, i32* [[P]], align 4 -; OLDGVN-NEXT: br label [[EXIT:%.*]] -; OLDGVN: BB1: -; OLDGVN-NEXT: [[V2:%.*]] = add i32 [[V1]], 100 -; OLDGVN-NEXT: br label [[EXIT]] -; OLDGVN: BB2: -; OLDGVN-NEXT: [[V3:%.*]] = add i32 [[V1]], 13 -; OLDGVN-NEXT: br label [[EXIT]] -; OLDGVN: Exit: -; OLDGVN-NEXT: [[V4:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[BB1]] ], [ [[V1]], [[BB2]] ] -; OLDGVN-NEXT: [[PHI:%.*]] = phi i32 [ 10, [[F]] ], [ [[V2]], [[BB1]] ], [ [[V3]], [[BB2]] ] -; OLDGVN-NEXT: [[V5:%.*]] = add i32 [[PHI]], [[V4]] -; OLDGVN-NEXT: ret i32 [[V5]] -; -; NEWGVN-LABEL: @test28( -; NEWGVN-NEXT: Entry: -; NEWGVN-NEXT: br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]] -; NEWGVN: T: -; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 -; NEWGVN-NEXT: br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; NEWGVN: F: -; NEWGVN-NEXT: store i32 13, i32* [[P]], align 4 -; NEWGVN-NEXT: br label [[EXIT:%.*]] -; NEWGVN: BB1: -; NEWGVN-NEXT: [[V2:%.*]] = add i32 [[V1]], 100 -; NEWGVN-NEXT: br label [[EXIT]] -; NEWGVN: BB2: -; NEWGVN-NEXT: [[V3:%.*]] = add i32 [[V1]], 13 -; NEWGVN-NEXT: br label [[EXIT]] -; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ 10, [[F]] ], [ [[V2]], [[BB1]] ], [ [[V3]], [[BB2]] ] -; NEWGVN-NEXT: [[V4:%.*]] = load i32, i32* [[P]], align 4 -; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[PHI]], [[V4]] -; NEWGVN-NEXT: ret i32 [[V5]] +; GVN-LABEL: @test28( +; GVN-NEXT: Entry: +; GVN-NEXT: br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]] +; GVN: T: +; GVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 +; GVN-NEXT: br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; GVN: F: +; GVN-NEXT: store i32 13, i32* [[P]], align 4 +; GVN-NEXT: br label [[EXIT:%.*]] +; GVN: BB1: +; GVN-NEXT: [[V2:%.*]] = add i32 [[V1]], 100 +; GVN-NEXT: br label [[EXIT]] +; GVN: BB2: +; GVN-NEXT: [[V3:%.*]] = add i32 [[V1]], 13 +; GVN-NEXT: br label [[EXIT]] +; GVN: Exit: +; GVN-NEXT: [[V4:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[BB1]] ], [ [[V1]], [[BB2]] ] +; GVN-NEXT: [[PHI:%.*]] = phi i32 [ 10, [[F]] ], [ [[V2]], [[BB1]] ], [ [[V3]], [[BB2]] ] +; GVN-NEXT: [[V5:%.*]] = add i32 [[PHI]], [[V4]] +; GVN-NEXT: ret i32 [[V5]] ; Entry: br i1 %Cond1, label %T, label %F @@ -2078,10 +2043,11 @@ ; NEWGVN-NEXT: store i64 [[INDEX]], i64* [[P1]], align 4 ; NEWGVN-NEXT: br label [[LOOP_LATCH]] ; NEWGVN: F: +; NEWGVN-NEXT: [[V11:%.*]] = load i64, i64* [[P1]], align 4 ; NEWGVN-NEXT: br label [[LOOP_LATCH]] ; NEWGVN: Loop.Latch: +; NEWGVN-NEXT: [[V1:%.*]] = phi i64 [ [[INDEX]], [[T]] ], [ [[V11]], [[F]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i64 [ 100, [[T]] ], [ 50, [[F]] ] -; NEWGVN-NEXT: [[V1:%.*]] = load i64, i64* [[P1]], align 4 ; NEWGVN-NEXT: [[V2:%.*]] = add i64 [[V1]], [[PHI]] ; NEWGVN-NEXT: [[INDEX_INC]] = add i64 [[INDEX]], 1 ; NEWGVN-NEXT: [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]] @@ -2260,10 +2226,11 @@ ; NEWGVN-NEXT: [[V1:%.*]] = load i64, i64* [[P1]], align 4 ; NEWGVN-NEXT: br label [[LOOP_LATCH]] ; NEWGVN: F: +; NEWGVN-NEXT: [[V21:%.*]] = load i64, i64* [[P1]], align 4 ; NEWGVN-NEXT: br label [[LOOP_LATCH]] ; NEWGVN: Loop.Latch: +; NEWGVN-NEXT: [[V2:%.*]] = phi i64 [ [[V1]], [[T]] ], [ [[V21]], [[F]] ] ; NEWGVN-NEXT: [[PHI:%.*]] = phi i64 [ [[V1]], [[T]] ], [ 100, [[F]] ] -; NEWGVN-NEXT: [[V2:%.*]] = load i64, i64* [[P1]], align 4 ; NEWGVN-NEXT: [[V3:%.*]] = add i64 [[PHI]], [[V2]] ; NEWGVN-NEXT: [[INDEX_INC]] = add i64 [[INDEX]], 1 ; NEWGVN-NEXT: [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]] @@ -2364,6 +2331,7 @@ ret i64 %V4 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test39(i64* %P, i1 %Cond1, i64 %TC) { ; Entry ; | @@ -2458,6 +2426,7 @@ ret i64 %V5 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test40(i64* %P, i64 %TC) { ; Entry _ ; | / | @@ -2981,6 +2950,7 @@ ret i64 %V9 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test45(i64* %P, i64 %TC1, i1 %Cond1, i64 %TC2) { ; Entry _ ; | / | @@ -3105,6 +3075,7 @@ ret i64 %V9 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test46(i64* %P, i64 %TC) { ; Entry _ ; | / | @@ -3170,6 +3141,7 @@ ret i64 %V5 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test47(i64* %P, i64 %TC) { ; Entry _ ; | / | @@ -3236,6 +3208,7 @@ ret i64 %V5 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test48(i64* %P, i64 %TC) { ; Entry _ ; | / | @@ -3389,6 +3362,7 @@ ret void } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test50(i64* %P, i64 %TC1, i1 %Cond1, i64 %TC2) { ; Entry _ ; | / | @@ -3516,6 +3490,7 @@ ret i64 %V9 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test51(i64* %P, i64 %TC, i1 %Cond) { ; Entry ; | @@ -3714,6 +3689,7 @@ ret i64 %V3 } +; TODO: Add support for partial redundant load elimination for loops. define i64 @test53(i64* %P, i64 %TC, i1 %Cond1) { ; Entry ; | @@ -3768,11 +3744,14 @@ ; NEWGVN: BB1: ; NEWGVN-NEXT: [[I1:%.*]] = insertelement <4 x i64> [[V1]], i64 [[INDEX]], i32 1 ; NEWGVN-NEXT: store <4 x i64> [[I1]], <4 x i64>* [[P1]], align 32 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[I1]] to i256 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64 ; NEWGVN-NEXT: br label [[BB3:%.*]] ; NEWGVN: BB2: +; NEWGVN-NEXT: [[V21:%.*]] = load i64, i64* [[P]], align 4 ; NEWGVN-NEXT: br label [[BB3]] ; NEWGVN: BB3: -; NEWGVN-NEXT: [[V2:%.*]] = load i64, i64* [[P]], align 4 +; NEWGVN-NEXT: [[V2:%.*]] = phi i64 [ [[V21]], [[BB2]] ], [ [[TMP1]], [[BB1]] ] ; NEWGVN-NEXT: [[V3:%.*]] = add i64 [[V2]], [[INDEX]] ; NEWGVN-NEXT: br label [[LOOP_LATCH]] ; NEWGVN: Loop.Latch: @@ -3854,14 +3833,14 @@ ; NEWGVN: BB2: ; NEWGVN-NEXT: br label [[BB3]] ; NEWGVN: BB3: +; NEWGVN-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ 10, [[BB2]] ], [ 15, [[BB1]] ] ; NEWGVN-NEXT: [[PHI1:%.*]] = phi i32 [ 10, [[BB1]] ], [ 5, [[BB2]] ] ; NEWGVN-NEXT: [[COND2:%.*]] = icmp ne i32 [[V1:%.*]], 0 ; NEWGVN-NEXT: br i1 [[COND2]], label [[BB4:%.*]], label [[EXIT:%.*]] ; NEWGVN: BB4: -; NEWGVN-NEXT: [[V6:%.*]] = add nsw i32 [[PHI1]], 5 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[BB3]] ], [ [[V6]], [[BB4]] ] +; NEWGVN-NEXT: [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[BB3]] ], [ [[PHIOFOPS]], [[BB4]] ] ; NEWGVN-NEXT: ret i32 [[PHI2]] ; Entry: @@ -3936,8 +3915,7 @@ ; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ [[TMP3]], [[F]] ] -; NEWGVN-NEXT: [[V5:%.*]] = load i32, i32* [[P]], align 4 +; NEWGVN-NEXT: [[V5:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ [[TMP3]], [[F]] ] ; NEWGVN-NEXT: ret i32 [[V5]] ; Entry: @@ -4048,4 +4026,80 @@ ret i32 %V4 } +define internal fastcc i32 @test58() { +; Entry +; | +; BB1 +; / | +; BB2 | +; \ | +; BB3 +; / | +; BB4 | +; \ | +; v v +; Exit +; +; OLDGVN-LABEL: @test58( +; OLDGVN-NEXT: Entry: +; OLDGVN-NEXT: store i32 0, i32* null, align 8 +; OLDGVN-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] +; OLDGVN: BB2: +; OLDGVN-NEXT: store i32 0, i32* null, align 8 +; OLDGVN-NEXT: br label [[BB3]] +; OLDGVN: BB3: +; OLDGVN-NEXT: br i1 false, label [[BB4:%.*]], label [[EXIT:%.*]] +; OLDGVN: BB4: +; OLDGVN-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 0 to i64 +; OLDGVN-NEXT: br label [[EXIT]] +; OLDGVN: Exit: +; OLDGVN-NEXT: ret i32 0 +; +; NEWGVN-LABEL: @test58( +; NEWGVN-NEXT: Entry: +; NEWGVN-NEXT: store i32 0, i32* null, align 8 +; NEWGVN-NEXT: br label [[BB1:%.*]] +; NEWGVN: BB1: +; NEWGVN-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] +; NEWGVN: BB2: +; NEWGVN-NEXT: store i8 poison, i8* null, align 1 +; NEWGVN-NEXT: br label [[BB3]] +; NEWGVN: BB3: +; NEWGVN-NEXT: [[V2:%.*]] = tail call i32 @llvm.smax.i32(i32 0, i32 0) +; NEWGVN-NEXT: [[COND:%.*]] = icmp sgt i32 [[V2]], 0 +; NEWGVN-NEXT: br i1 [[COND]], label [[BB4:%.*]], label [[EXIT:%.*]] +; NEWGVN: BB4: +; NEWGVN-NEXT: br label [[EXIT]] +; NEWGVN: Exit: +; NEWGVN-NEXT: ret i32 0 +; +Entry: + store i32 0, i32* null, align 8 + br label %BB1 + +BB1: + br i1 false, label %BB2, label %BB3 + +BB2: + store i32 0, i32* null, align 8 + br label %BB3 + +BB3: + %V1 = load i32, i32* null, align 4 + %V2 = tail call i32 @llvm.smax.i32(i32 %V1, i32 0) + %Cond = icmp sgt i32 %V2, 0 + br i1 %Cond, label %BB4, label %Exit + +BB4: + %wide.trip.count = zext i32 %V2 to i64 + br label %Exit + +Exit: + ret i32 0 +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i32 @llvm.smax.i32(i32, i32) #1 + attributes #0 = { readnone } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/llvm/test/Transforms/NewGVN/pr31483.ll b/llvm/test/Transforms/NewGVN/pr31483.ll --- a/llvm/test/Transforms/NewGVN/pr31483.ll +++ b/llvm/test/Transforms/NewGVN/pr31483.ll @@ -10,20 +10,20 @@ ; CHECK-LABEL: @ham( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: store i8* %arg1, i8** [[TMP]], align 8 -; CHECK-NEXT: br label %bb2 +; CHECK-NEXT: store i8* [[ARG1:%.*]], i8** [[TMP]], align 8 +; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP3:%.*]] = phi i8* [ %arg, %bb ], [ %tmp7, %bb22 ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i8* [ [[ARG:%.*]], [[BB:%.*]] ], [ [[TMP7:%.*]], [[BB22:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %bb6, label %bb23 +; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB23:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7]] = getelementptr inbounds i8, i8* [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP4]] to i32 -; CHECK-NEXT: switch i32 [[TMP9]], label %bb22 [ -; CHECK-NEXT: i32 115, label %bb10 -; CHECK-NEXT: i32 105, label %bb16 -; CHECK-NEXT: i32 99, label %bb16 +; CHECK-NEXT: switch i32 [[TMP9]], label [[BB22]] [ +; CHECK-NEXT: i32 115, label [[BB10:%.*]] +; CHECK-NEXT: i32 105, label [[BB16:%.*]] +; CHECK-NEXT: i32 99, label [[BB16]] ; CHECK-NEXT: ] ; CHECK: bb10: ; CHECK-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP]], align 8 @@ -32,16 +32,16 @@ ; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i8** ; CHECK-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 ; CHECK-NEXT: [[TMP15:%.*]] = call signext i32 (i8*, ...) @zot(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @global, i32 0, i32 0), i8* [[TMP14]]) -; CHECK-NEXT: br label %bb22 +; CHECK-NEXT: br label [[BB22]] ; CHECK: bb16: ; CHECK-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP]], align 8 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 8 ; CHECK-NEXT: store i8* [[TMP18]], i8** [[TMP]], align 8 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 4 ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* -; CHECK-NEXT: br label %bb22 +; CHECK-NEXT: br label [[BB22]] ; CHECK: bb22: -; CHECK-NEXT: br label %bb2 +; CHECK-NEXT: br label [[BB2]] ; CHECK: bb23: ; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.va_end(i8* [[TMP24]]) diff --git a/llvm/test/Transforms/NewGVN/pr31613.ll b/llvm/test/Transforms/NewGVN/pr31613.ll --- a/llvm/test/Transforms/NewGVN/pr31613.ll +++ b/llvm/test/Transforms/NewGVN/pr31613.ll @@ -73,7 +73,7 @@ define void @e(i32 %a0, i32 %a1, %struct.a** %p2) { ; CHECK-LABEL: @e( -; CHECK-NEXT: [[F:%.*]] = alloca i32 +; CHECK-NEXT: [[F:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A0:%.*]], i32* [[F]], align 4, !g !0 ; CHECK-NEXT: br label [[H:%.*]] ; CHECK: h: diff --git a/llvm/test/Transforms/NewGVN/pr32836.ll b/llvm/test/Transforms/NewGVN/pr32836.ll --- a/llvm/test/Transforms/NewGVN/pr32836.ll +++ b/llvm/test/Transforms/NewGVN/pr32836.ll @@ -5,21 +5,23 @@ @b = external global %struct.anon define void @tinkywinky(i1 %patatino) { ; CHECK-LABEL: @tinkywinky( -; CHECK-NEXT: store i32 8, i32* null +; CHECK-NEXT: store i32 8, i32* null, align 4 ; CHECK-NEXT: br i1 [[PATATINO:%.*]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: br label [[L:%.*]] ; CHECK: L: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* null, align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* null -; CHECK-NEXT: [[BF_LOAD1:%.*]] = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP11]], [[L]] ], [ 8, [[TMP0:%.*]] ] +; CHECK-NEXT: [[BF_LOAD1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_ANON:%.*]], %struct.anon* @b, i64 0, i32 0), align 4 ; CHECK-NEXT: [[BF_VALUE:%.*]] = and i32 [[TMP1]], 536870911 ; CHECK-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -536870912 ; CHECK-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -; CHECK-NEXT: store i32 [[BF_SET]], i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0) +; CHECK-NEXT: store i32 [[BF_SET]], i32* getelementptr inbounds ([[STRUCT_ANON]], %struct.anon* @b, i64 0, i32 0), align 4 ; CHECK-NEXT: br label [[LOR_END:%.*]] ; CHECK: lor.end: +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[BF_SET]], 536870911 ; CHECK-NEXT: br label [[L]] ; store i32 8, i32* null diff --git a/llvm/test/Transforms/NewGVN/pr32934.ll b/llvm/test/Transforms/NewGVN/pr32934.ll --- a/llvm/test/Transforms/NewGVN/pr32934.ll +++ b/llvm/test/Transforms/NewGVN/pr32934.ll @@ -1,39 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=newgvn %s | FileCheck %s -; CHECK: define void @tinkywinky() { -; CHECK-NEXT: entry: -; CHECK-NEXT: %d = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* null, align 4 -; CHECK-NEXT: br label %for.cond -; CHECK: for.cond: ; preds = %if.end, %entry -; CHECK-NEXT: %0 = load i32, i32* null, align 4 -; CHECK-NEXT: %cmp = icmp slt i32 %0, 1 -; CHECK-NEXT: br i1 %cmp, label %for.body, label %while.cond -; CHECK: for.body: ; preds = %for.cond -; CHECK-NEXT: %1 = load i32, i32* @a, align 4 -; CHECK-NEXT: store i32 %1, i32* %d, align 4 -; CHECK-NEXT: br label %L -; CHECK: L: ; preds = %if.then, %for.body -; CHECK-NEXT: %tobool = icmp ne i32 %1, 0 -; CHECK-NEXT: br i1 %tobool, label %if.then, label %if.end -; CHECK: if.then: ; preds = %L -; CHECK-NEXT: call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0)) -; CHECK-NEXT: br label %L -; CHECK: if.end: ; preds = %L -; CHECK-NEXT: br label %for.cond -; CHECK: while.cond: ; preds = %while.body, %for.cond -; CHECK-NEXT: br i1 undef, label %while.body, label %while.end -; CHECK: while.body: ; preds = %while.cond -; CHECK-NEXT: call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0)) -; CHECK-NEXT: br label %while.cond -; CHECK: while.end: -; CHECK-NEXT: %2 = load i32, i32* @a, align 4 -; CHECK-NEXT: store i32 %2, i32* undef, align 4 -; CHECK-NEXT: ret void - @a = external global i32, align 4 @patatino = external unnamed_addr constant [2 x i8], align 1 define void @tinkywinky() { +; CHECK-LABEL: @tinkywinky( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 0, i32* null, align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[WHILE_COND:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: store i32 [[TMP1]], i32* [[D]], align 4 +; CHECK-NEXT: br label [[L:%.*]] +; CHECK: L: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0)) +; CHECK-NEXT: br label [[L]] +; CHECK: if.end: +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: while.cond: +; CHECK-NEXT: br i1 undef, label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.body: +; CHECK-NEXT: call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0)) +; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK: while.end: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: store i32 [[TMP2]], i32* undef, align 4 +; CHECK-NEXT: ret void +; entry: %d = alloca i32, align 4 store i32 0, i32* null, align 4 diff --git a/llvm/test/Transforms/NewGVN/pr35125.ll b/llvm/test/Transforms/NewGVN/pr35125.ll --- a/llvm/test/Transforms/NewGVN/pr35125.ll +++ b/llvm/test/Transforms/NewGVN/pr35125.ll @@ -29,7 +29,7 @@ ; CHECK-NEXT: store i32 [[TMP1]], i32* @a, align 4 ; CHECK-NEXT: br label [[IF_END6]] ; CHECK: if.end6: -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[LOR_END]] ], [ [[TMP0]], [[IF_END]] ] ; CHECK-NEXT: [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 [[TMP2]]) ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/NewGVN/refine-stores.ll b/llvm/test/Transforms/NewGVN/refine-stores.ll --- a/llvm/test/Transforms/NewGVN/refine-stores.ll +++ b/llvm/test/Transforms/NewGVN/refine-stores.ll @@ -15,7 +15,7 @@ ; CHECK-LABEL: @spam( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[FOO:%.*]] = bitcast i32* [[A:%.*]] to %struct.eggs** -; CHECK-NEXT: store %struct.eggs* null, %struct.eggs** [[FOO]] +; CHECK-NEXT: store %struct.eggs* null, %struct.eggs** [[FOO]], align 8 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB2:%.*]] @@ -23,8 +23,8 @@ ; CHECK-NEXT: call void @baz() ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* undef -; CHECK-NEXT: store %struct.eggs* null, %struct.eggs** [[FOO]] +; CHECK-NEXT: store i32 0, i32* undef, align 4 +; CHECK-NEXT: store %struct.eggs* null, %struct.eggs** [[FOO]], align 8 ; CHECK-NEXT: unreachable ; bb: @@ -58,11 +58,11 @@ ; CHECK-NEXT: b: ; CHECK-NEXT: br label [[C:%.*]] ; CHECK: c: -; CHECK-NEXT: store i64 undef, i64* null +; CHECK-NEXT: store i64 undef, i64* null, align 4 ; CHECK-NEXT: br label [[E:%.*]] ; CHECK: e: -; CHECK-NEXT: [[G:%.*]] = load i64*, i64** null -; CHECK-NEXT: store i64* undef, i64** null +; CHECK-NEXT: [[G:%.*]] = load i64*, i64** null, align 8 +; CHECK-NEXT: store i64* undef, i64** null, align 8 ; CHECK-NEXT: br i1 undef, label [[C]], label [[E]] ; b: @@ -90,16 +90,16 @@ ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP:%.*]] = phi %struct.hoge* [ [[ARG:%.*]], [[BB:%.*]] ], [ null, [[BB1]] ] -; CHECK-NEXT: store %struct.hoge* [[TMP]], %struct.hoge** undef +; CHECK-NEXT: store %struct.hoge* [[TMP]], %struct.hoge** undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] ; CHECK: bb2: ; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ [[TMP8:%.*]], [[BB7:%.*]] ], [ 0, [[BB1]] ] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[BB7]], label [[BB5:%.*]] ; CHECK: bb5: -; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* null +; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* null, align 4 ; CHECK-NEXT: call void @quux() -; CHECK-NEXT: store i64 [[TMP6]], i64* undef +; CHECK-NEXT: store i64 [[TMP6]], i64* undef, align 4 ; CHECK-NEXT: br label [[BB7]] ; CHECK: bb7: ; CHECK-NEXT: [[TMP8]] = add i64 [[TMP3]], 1 @@ -137,14 +137,14 @@ define void @b() { ; CHECK-LABEL: @b( -; CHECK-NEXT: [[C:%.*]] = alloca [[STRUCT_A:%.*]] +; CHECK-NEXT: [[C:%.*]] = alloca [[STRUCT_A:%.*]], align 8 ; CHECK-NEXT: br label [[D:%.*]] ; CHECK: m: ; CHECK-NEXT: unreachable ; CHECK: d: ; CHECK-NEXT: [[G:%.*]] = bitcast %struct.a* [[C]] to i8* ; CHECK-NEXT: [[F:%.*]] = bitcast i8* [[G]] to i32* -; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[F]] +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[F]], align 4 ; CHECK-NEXT: br i1 undef, label [[I:%.*]], label [[J:%.*]] ; CHECK: i: ; CHECK-NEXT: br i1 undef, label [[K:%.*]], label [[M:%.*]] diff --git a/llvm/test/Transforms/NewGVN/rle-nonlocal.ll b/llvm/test/Transforms/NewGVN/rle-nonlocal.ll --- a/llvm/test/Transforms/NewGVN/rle-nonlocal.ll +++ b/llvm/test/Transforms/NewGVN/rle-nonlocal.ll @@ -7,14 +7,14 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]] ; CHECK: block2: -; CHECK-NEXT: [[A:%.*]] = load i32*, i32** [[P:%.*]] +; CHECK-NEXT: [[A:%.*]] = load i32*, i32** [[P:%.*]], align 8 ; CHECK-NEXT: br label [[BLOCK4:%.*]] ; CHECK: block3: -; CHECK-NEXT: [[B:%.*]] = load i32*, i32** [[P]] +; CHECK-NEXT: [[B:%.*]] = load i32*, i32** [[P]], align 8 ; CHECK-NEXT: br label [[BLOCK4]] ; CHECK: block4: -; CHECK-NEXT: [[EXISTINGPHI:%.*]] = phi i32* [ [[A]], [[BLOCK2]] ], [ [[B]], [[BLOCK3]] ] -; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[EXISTINGPHI]] +; CHECK-NEXT: [[DEAD:%.*]] = phi i32* [ [[A]], [[BLOCK2]] ], [ [[B]], [[BLOCK3]] ] +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[DEAD]], align 4 ; CHECK-NEXT: [[E:%.*]] = add i32 [[C]], [[C]] ; CHECK-NEXT: ret i32 [[E]] ; diff --git a/llvm/test/Transforms/NewGVN/storeoverstore.ll b/llvm/test/Transforms/NewGVN/storeoverstore.ll --- a/llvm/test/Transforms/NewGVN/storeoverstore.ll +++ b/llvm/test/Transforms/NewGVN/storeoverstore.ll @@ -61,13 +61,13 @@ ; CHECK: 5: ; CHECK-NEXT: br label [[TMP6]] ; CHECK: 6: +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i32 [ 10, [[TMP5]] ], [ 15, [[TMP4]] ] ; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ] -; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP9:%.*]] +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP8:%.*]] ; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[DOT0]], 5 -; CHECK-NEXT: br label [[TMP9]] -; CHECK: 9: -; CHECK-NEXT: [[DOT1:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ] +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[DOT1:%.*]] = phi i32 [ [[PHIOFOPS]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ] ; CHECK-NEXT: ret i32 [[DOT1]] ; store i32 5, i32* %0, align 4