diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -76,6 +76,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -157,6 +158,10 @@ static cl::opt EnablePhiOfOps("enable-phi-of-ops", cl::init(true), cl::Hidden); +// Enables load coercion for non-constant values. +static cl::opt EnableLoadCoercion("enable-load-coercion", cl::init(true), + cl::Hidden); + //===----------------------------------------------------------------------===// // GVN Pass //===----------------------------------------------------------------------===// @@ -656,6 +661,13 @@ // Deletion info. SmallPtrSet InstructionsToErase; + // Maps the loads to their depending instructions. + std::map> LoadCoercion; + + // During load coercion, new loads might be generated. We do not want to apply + // load coercion for them for now. + SmallVector NewLoadsInLoadCoercion; + public: NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, @@ -782,9 +794,9 @@ ExprResult checkExprResults(Expression *, Instruction *, Value *) const; ExprResult performSymbolicEvaluation(Value *, SmallPtrSetImpl &) const; - const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, - Instruction *, - MemoryAccess *) const; + const Expression *createLoadExpAndUpdateMemUses(LoadInst *, Value *, + MemoryAccess *, + MemoryAccess *) const; const Expression *performSymbolicLoadEvaluation(Instruction *) const; const Expression *performSymbolicStoreEvaluation(Instruction *) const; ExprResult performSymbolicCallEvaluation(Instruction *) const; @@ -900,6 +912,29 @@ // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. int64_t StartingVNCounter = 0; + + // The following functions are used in load coercion: + // Collect the load instructions and add them to the LoadCoercion map. This is + // the first phase of load coercion. + const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, + Instruction *, + MemoryAccess *) const; + // Iterate the LoadCoercion map and replace the load instructions with a + // sequence of instructions which extract the value of the load from the + // depending instruction. This is the seccond phase of load coercion. + bool implementLoadCoercion(); + // Try to add the load along with the depending instrunction(s) in + // LoadCoercion map. + void tryAddLoadDepInsnIntoLoadCoercionMap(LoadInst *, Instruction *) const; + // Run value numbering for the instructions that are generated during load + // coercion. + void runValueNumberingForLoadCoercionInsns(Instruction *); + // Update MemorySSA with the load instructions that are emitted during load + // coercion. + void updateMemorySSA(Instruction *, Instruction *); + // Extract the value that will replace the load from the depending + // instruction. + Value *getExtractedValue(LoadInst *, Instruction *); }; } // end anonymous namespace @@ -1440,13 +1475,33 @@ return createStoreExpression(SI, StoreAccess); } +void NewGVN::tryAddLoadDepInsnIntoLoadCoercionMap( + LoadInst *LI, Instruction *CurrentDepI) const { + // Add the load and the corresponding depending instruction in LoadCoercion + // map. + const_cast(this)->LoadCoercion[LI].insert(CurrentDepI); +} + // See if we can extract the value of a loaded pointer from a load, a store, or -// a memory instruction. +// a memory instruction. Load coercion has two phases. In the first phase (the +// code below), we collect the load intructions and we add them to the +// LoadCoerion map. If the loaded value is a constant, then we just create a +// constant expression and we do not add the load to the LoadCoercion map. In +// the second phase (implementLoadCoercion()), we iterate the LoadCoercion map +// and we try to replace the load with the value that we extract from the +// depending instruction. const Expression * NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, LoadInst *LI, Instruction *DepInst, MemoryAccess *DefiningAccess) const { assert((!LI || LI->isSimple()) && "Not a simple load"); + + // Do not apply load coercion for load isntructions that are generated during + // load coercion. + auto It = llvm::find(NewLoadsInLoadCoercion, LI); + if (It != NewLoadsInLoadCoercion.end()) + return nullptr; + if (auto *DepSI = dyn_cast(DepInst)) { // Can't forward from non-atomic to atomic without violating memory model. // Also don't need to coerce if they are the same type, we will just @@ -1464,6 +1519,13 @@ << " to constant " << *Res << "\n"); return createConstantExpression(Res); } + } else if (EnableLoadCoercion) { + tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepInst); + // Load coercion occurs before the elimination phase. The load + // instructions that will be eliminated with load coercion are not added + // in any congruence class. Thus, we do not create any load expression + // for them. + return nullptr; } } } else if (auto *DepLI = dyn_cast(DepInst)) { @@ -1517,6 +1579,19 @@ return nullptr; } +const Expression * +NewGVN::createLoadExpAndUpdateMemUses(LoadInst *LI, Value *LoadAddressLeader, + MemoryAccess *OriginalAccess, + MemoryAccess *DefiningAccess) const { + const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI, + DefiningAccess); + // If our MemoryLeader is not our defining access, add a use to the + // MemoryLeader, so that we get reprocessed when it changes. + if (LE->getMemoryLeader() != DefiningAccess) + addMemoryUsers(LE->getMemoryLeader(), OriginalAccess); + return LE; +} + const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { auto *LI = cast(I); @@ -1533,6 +1608,7 @@ MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(OriginalAccess); + // Check if we can apply load coercion. if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { Instruction *DefiningInst = MD->getMemoryInst(); @@ -1550,13 +1626,9 @@ } } - const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI, - DefiningAccess); - // If our MemoryLeader is not our defining access, add a use to the - // MemoryLeader, so that we get reprocessed when it changes. - if (LE->getMemoryLeader() != DefiningAccess) - addMemoryUsers(LE->getMemoryLeader(), OriginalAccess); - return LE; + // If we cannot apply load coercion, then we create a load expression. + return createLoadExpAndUpdateMemUses(LI, LoadAddressLeader, OriginalAccess, + DefiningAccess); } NewGVN::ExprResult @@ -2983,6 +3055,8 @@ MemoryToUsers.clear(); RevisitOnReachabilityChange.clear(); IntrinsicInstPred.clear(); + LoadCoercion.clear(); + NewLoadsInLoadCoercion.clear(); } // Assign local DFS number mapping to instructions, and leave space for Value @@ -3487,6 +3561,15 @@ verifyIterationSettled(F); verifyStoreExpressions(); + // During load coercion, we replace the load instructions with a new sequence + // of instructions. Next, we run value numbering which adds the new + // instructions in the right congruence classes. In this way, any redundant + // instructions will be optimized out in the elimination phase. + if (EnableLoadCoercion && implementLoadCoercion()) + // Update the newly generated instructions with the correct DFS numbers. + // TODO: Update DFS numbers faster. + ICount = updateDFSNumbers(1); + Changed |= eliminateInstructions(F); // Delete all instructions marked for deletion. @@ -3822,6 +3905,135 @@ return nullptr; } +// Update MemorySSA with the newly emitted load instruction. +void NewGVN::updateMemorySSA(Instruction *LoadToOptimize, + Instruction *NewLoad) { + MemorySSAUpdater MemSSAUpdater(MSSA); + MemoryAccess *DefiningAccess = MSSA->getLiveOnEntryDef(); + MemoryAccess *NewAccess = MemSSAUpdater.createMemoryAccessInBB( + NewLoad, DefiningAccess, NewLoad->getParent(), + MemorySSA::BeforeTerminator); + if (auto *NewDef = dyn_cast(NewAccess)) + MemSSAUpdater.insertDef(NewDef, /*RenameUses=*/true); + else + MemSSAUpdater.insertUse(cast(NewAccess), + /*RenameUses=*/true); + + // Update the metadata of the new load. + AAMDNodes Tags = LoadToOptimize->getAAMetadata(); + if (Tags) + NewLoad->setAAMetadata(Tags); + + if (auto *MD = LoadToOptimize->getMetadata(LLVMContext::MD_invariant_load)) + NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); + if (auto *InvGroupMD = + LoadToOptimize->getMetadata(LLVMContext::MD_invariant_group)) + NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); + if (auto *RangeMD = LoadToOptimize->getMetadata(LLVMContext::MD_range)) + NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); +} + +// Run value numbering for the instructions that are generated during load +// coercion. In this way, any redundant instructions will be removed in the +// elimination phase. +void NewGVN::runValueNumberingForLoadCoercionInsns(Instruction *I) { + TOPClass->insert(I); + ValueToClass[I] = TOPClass; + if (LoadInst *LI = dyn_cast(I)) + NewLoadsInLoadCoercion.push_back(LI); + valueNumberInstruction(I); + updateProcessedCount(I); +} + +// Extract the correct value from the depending instruction. +Value *NewGVN::getExtractedValue(LoadInst *LI, Instruction *DepI) { + + Type *LoadTy = LI->getType(); + Value *NewValue = nullptr; + Instruction *InsertPtr = nullptr; + if (auto *Store = dyn_cast(DepI)) { + int Offset = analyzeLoadFromClobberingStore(LoadTy, LI->getPointerOperand(), + Store, DL); + InsertPtr = Store->getNextNode(); + // Emit the instructions that extract the correct value from store. + NewValue = getStoreValueForLoad(Store->getValueOperand(), Offset, LoadTy, + InsertPtr, DL); + } else if (LoadInst *Load = dyn_cast(DepI)) { + int Offset = analyzeLoadFromClobberingLoad(LoadTy, LI->getPointerOperand(), + Load, DL); + InsertPtr = Load->getNextNode(); + // Emit the instructions that extract the correct value from load. + NewValue = getLoadValueForLoad(Load, Offset, LoadTy, InsertPtr, DL); + } + + if (!isa(NewValue) && !isa(NewValue)) + for (Instruction *CurInsn = DepI->getNextNode(); CurInsn != InsertPtr; + CurInsn = CurInsn->getNextNode()) { + if (LoadInst *NewLI = dyn_cast(CurInsn)) + updateMemorySSA(LI, NewLI); + runValueNumberingForLoadCoercionInsns(CurInsn); + } + + return NewValue; +} + +// Iterate over the load instructions of LoadCoercion map and it replaces +// them with the right sequence of instructions. +bool NewGVN::implementLoadCoercion() { + bool AnythingReplaced = false; + for (const auto &P : LoadCoercion) { + LoadInst *LI = cast(P.first); + SmallPtrSet DependingInsns = P.second; + Value *NewValue = nullptr; + Instruction *FirstDepI = *DependingInsns.begin(); + + if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI)) + // Extract the correct value from the depending instruction. + NewValue = getExtractedValue(LI, FirstDepI); + + // If we could not eliminate the load, then we need to create a load + // expression for the load and run value numbering in order to add it in the + // correct congruence class. + if (!NewValue) { + Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand()); + MemoryAccess *OriginalAccess = getMemoryAccess(LI); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); + createLoadExpAndUpdateMemUses(LI, LoadAddressLeader, OriginalAccess, + DefiningAccess); + valueNumberInstruction(LI); + updateProcessedCount(LI); + continue; + } + + // Collect the uses of the load. + SmallVector LIUses; + for (Use &U : LI->uses()) + LIUses.push_back(cast(U.getUser())); + // Remove the load and update its uses. + InstructionsToErase.insert(LI); + LI->replaceAllUsesWith(NewValue); + // Run value numbering for the uses of the load after updating them with the + // new value. In this way, we might be able to eliminate them. + for (Instruction *User : LIUses) { + valueNumberInstruction(User); + updateProcessedCount(User); + } + // Update the name of the phi node if we generated one. + if (isa(NewValue)) + NewValue->takeName(LI); + // Update dedug information. + if (Instruction *I = dyn_cast(NewValue)) + I->setDebugLoc(LI->getDebugLoc()); + LLVM_DEBUG(dbgs() << "Load coersion: The load " << *LI + << " was eliminated and its uses were replaced by " + << *NewValue << "\n"); + AnythingReplaced = true; + } + + return AnythingReplaced; +} + bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll @@ -448,9 +448,9 @@ ; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 ; NEWGVN-NEXT: [[V3:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: store i32 [[V:%.*]], i32* [[P1]], align 4 -; NEWGVN-NEXT: [[V4:%.*]] = load i8, i8* [[P2]], align 1 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8 ; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V3]] -; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[V4]], [[V5]] +; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[TMP1]], [[V5]] ; NEWGVN-NEXT: ret i8 [[V6]] ; %V1 = load i32, i32* %P1 diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll @@ -11,9 +11,9 @@ ; ; NEWGVN-LABEL: @test1( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = bitcast i32 [[V1]] to float ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P1]], align 4 -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP1]] ; store i32 %V1, i32* %P %P1 = bitcast i32* %P to float* @@ -32,9 +32,11 @@ ; ; NEWGVN-LABEL: @test2( ; NEWGVN-NEXT: store i64* [[V1:%.*]], i64** [[P1:%.*]], align 8 +; NEWGVN-NEXT: [[TMP1:%.*]] = ptrtoint i64* [[V1]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; NEWGVN-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64** [[P1]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP3]] ; store i64* %V1, i64** %P1 %P2 = bitcast i64** %P1 to float* @@ -51,9 +53,9 @@ ; ; NEWGVN-LABEL: @test3( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: ret i8 [[V2]] +; NEWGVN-NEXT: ret i8 [[TMP1]] ; store i32 %V1, i32* %P1 %P2 = bitcast i32* %P1 to i8* @@ -71,9 +73,10 @@ ; ; NEWGVN-LABEL: @test4( ; NEWGVN-NEXT: store i64 [[V1:%.*]], i64* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[V1]] to i32 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P1]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP2]] ; store i64 %V1, i64* %P1 %P2 = bitcast i64* %P1 to float* @@ -107,9 +110,9 @@ ; ; NEWGVN-LABEL: @test6( ; NEWGVN-NEXT: store i64 [[V1:%.*]], i64* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = inttoptr i64 [[V1]] to i8* ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P1]] to i8** -; NEWGVN-NEXT: [[V2:%.*]] = load i8*, i8** [[P2]], align 8 -; NEWGVN-NEXT: ret i8* [[V2]] +; NEWGVN-NEXT: ret i8* [[TMP1]] ; store i64 %V1, i64* %P1 %P2 = bitcast i64* %P1 to i8** @@ -127,9 +130,10 @@ ; ; NEWGVN-LABEL: @test7( ; NEWGVN-NEXT: store double [[V1:%.*]], double* [[P1:%.*]], align 8 +; NEWGVN-NEXT: [[TMP1:%.*]] = bitcast double [[V1]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast double* [[P1]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 -; NEWGVN-NEXT: ret i32 [[V2]] +; NEWGVN-NEXT: ret i32 [[TMP2]] ; store double %V1, double* %P1 %P2 = bitcast double* %P1 to i32* @@ -148,10 +152,11 @@ ; ; NEWGVN-LABEL: @test8( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = lshr i32 [[V1]], 16 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* ; NEWGVN-NEXT: [[P3:%.*]] = getelementptr i8, i8* [[P2]], i32 2 -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P3]], align 1 -; NEWGVN-NEXT: ret i8 [[V2]] +; NEWGVN-NEXT: ret i8 [[TMP2]] ; store i32 %V1, i32* %P1 %P2 = bitcast i32* %P1 to i8* @@ -179,14 +184,12 @@ ; NEWGVN-LABEL: @test9( ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: store i64 [[V:%.*]], i64* [[P1:%.*]], align 4 -; NEWGVN-NEXT: [[P3:%.*]] = bitcast i64* [[P1]] to double* +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast i64 [[V]] to double ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: -; NEWGVN-NEXT: [[B:%.*]] = load double, double* [[P3]], align 8 -; NEWGVN-NEXT: ret double [[B]] +; NEWGVN-NEXT: ret double [[TMP0]] ; NEWGVN: F: -; NEWGVN-NEXT: [[C:%.*]] = load double, double* [[P3]], align 8 -; NEWGVN-NEXT: ret double [[C]] +; NEWGVN-NEXT: ret double [[TMP0]] ; Entry: %A = load i64 , i64* %P1 @@ -216,12 +219,12 @@ ; ; NEWGVN-LABEL: @test10( ; NEWGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V0]] to i8 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i32 [[V0]] to float ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V1:%.*]] = load float, float* [[P1]], align 4 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, i8 [[V2]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[V1]], 1 +; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP1]], 0 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[TMP2]], 1 ; NEWGVN-NEXT: ret <{ i8, float }> [[I2]] ; store i32 %V0, i32* %P @@ -239,35 +242,20 @@ ; / \ ; T F ; -; OLDGVN-LABEL: @test11( -; OLDGVN-NEXT: Entry: -; OLDGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 -; OLDGVN-NEXT: [[TMP0:%.*]] = trunc i32 [[V0]] to i8 -; OLDGVN-NEXT: [[TMP1:%.*]] = bitcast i32 [[V0]] to float -; OLDGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; OLDGVN: T: -; OLDGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; OLDGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, float [[TMP1]], 1 -; OLDGVN-NEXT: ret <{ i8, float }> [[I1]] -; OLDGVN: F: -; OLDGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; OLDGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP0]], 0 -; OLDGVN-NEXT: ret <{ i8, float }> [[I2]] -; -; NEWGVN-LABEL: @test11( -; NEWGVN-NEXT: Entry: -; NEWGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 -; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; NEWGVN: T: -; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V1:%.*]] = load float, float* [[P1]], align 4 -; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, float [[V1]], 1 -; NEWGVN-NEXT: ret <{ i8, float }> [[I1]] -; NEWGVN: F: -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> poison, i8 [[V2]], 0 -; NEWGVN-NEXT: ret <{ i8, float }> [[I2]] +; GVN-LABEL: @test11( +; GVN-NEXT: Entry: +; GVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 +; GVN-NEXT: [[TMP0:%.*]] = trunc i32 [[V0]] to i8 +; GVN-NEXT: [[TMP1:%.*]] = bitcast i32 [[V0]] to float +; GVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; GVN: T: +; GVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* +; GVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, float [[TMP1]], 1 +; GVN-NEXT: ret <{ i8, float }> [[I1]] +; GVN: F: +; GVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; GVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP0]], 0 +; GVN-NEXT: ret <{ i8, float }> [[I2]] ; Entry: store i32 %V0, i32* %P @@ -308,16 +296,13 @@ ; NEWGVN-LABEL: @test12( ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast i32 [[V0]] to float ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: -; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V1:%.*]] = load float, float* [[P1]], align 4 -; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ float, float }> poison, float [[V1]], 1 +; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ float, float }> poison, float [[TMP0]], 1 ; NEWGVN-NEXT: ret <{ float, float }> [[I1]] ; NEWGVN: F: -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ float, float }> poison, float [[V2]], 0 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ float, float }> poison, float [[TMP0]], 0 ; NEWGVN-NEXT: ret <{ float, float }> [[I2]] ; Entry: @@ -348,11 +333,10 @@ ; ; NEWGVN-LABEL: @test13( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i32* [[P1]] to i64* -; NEWGVN-NEXT: [[V4:%.*]] = trunc i32 [[V1]] to i8 -; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V4]] +; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i8 [[V5]] ; store i32 %V1, i32* %P1 @@ -375,9 +359,8 @@ ; ; NEWGVN-LABEL: @test14( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V2]] +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 +; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i8 [[V5]] ; store i32 %V1, i32* %P1 diff --git a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll @@ -2571,14 +2571,15 @@ ; NEWGVN-NEXT: [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2 ; NEWGVN-NEXT: [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3 ; NEWGVN-NEXT: store <4 x i64> [[I4]], <4 x i64>* [[P1]], align 32 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64 ; NEWGVN-NEXT: [[INDEX_INC]] = add i64 [[INDEX]], 1 ; NEWGVN-NEXT: [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]] ; NEWGVN-NEXT: br i1 [[COND]], label [[LOOP]], label [[BB:%.*]] ; NEWGVN: BB: -; NEWGVN-NEXT: [[V4:%.*]] = load i64, i64* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[V5:%.*]] = add i64 [[V4]], [[TC]] +; NEWGVN-NEXT: [[V5:%.*]] = add i64 [[TMP1]], [[TC]] ; NEWGVN-NEXT: ret i64 [[V5]] ; Entry: @@ -2661,21 +2662,23 @@ ; NEWGVN-NEXT: [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2 ; NEWGVN-NEXT: [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3 ; NEWGVN-NEXT: store <4 x i64> [[I4]], <4 x i64>* [[P1]], align 32 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64> +; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i256 [[TMP0]] to i64 ; NEWGVN-NEXT: [[INDEX_INC]] = add i64 [[INDEX]], 1 ; NEWGVN-NEXT: [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]] ; NEWGVN-NEXT: br i1 [[COND]], label [[LOOP]], label [[BB1:%.*]] ; NEWGVN: BB1: ; NEWGVN-NEXT: br i1 [[COND1:%.*]], label [[BB2:%.*]], label [[BB3:%.*]] ; NEWGVN: BB2: -; NEWGVN-NEXT: [[V4:%.*]] = load i64, i64* [[P]], align 4 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: BB3: ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i64 [ [[V4]], [[BB2]] ], [ 100, [[BB3]] ] +; NEWGVN-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP3]], [[BB2]] ], [ 100, [[BB3]] ] ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P]] to <2 x i64>* -; NEWGVN-NEXT: [[V5:%.*]] = load <2 x i64>, <2 x i64>* [[P2]], align 16 -; NEWGVN-NEXT: [[V6:%.*]] = extractelement <2 x i64> [[V5]], i64 1 +; NEWGVN-NEXT: [[V6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1 ; NEWGVN-NEXT: [[V7:%.*]] = add i64 [[PHI]], [[V6]] ; NEWGVN-NEXT: ret i64 [[V7]] ; @@ -2779,21 +2782,23 @@ ; NEWGVN-NEXT: [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2 ; NEWGVN-NEXT: [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3 ; NEWGVN-NEXT: store <4 x i64> [[I4]], <4 x i64>* [[P1]], align 32 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64> +; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i256 [[TMP0]] to i64 ; NEWGVN-NEXT: [[INDEX1_INC]] = add i64 [[INDEX1]], 1 ; NEWGVN-NEXT: [[COND1:%.*]] = icmp ne i64 [[INDEX1_INC]], [[TC1:%.*]] ; NEWGVN-NEXT: br i1 [[COND1]], label [[LOOP1]], label [[BB1:%.*]] ; NEWGVN: BB1: ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB3:%.*]] ; NEWGVN: BB2: -; NEWGVN-NEXT: [[V4:%.*]] = load i64, i64* [[P]], align 4 ; NEWGVN-NEXT: br label [[LOOP2:%.*]] ; NEWGVN: BB3: ; NEWGVN-NEXT: br label [[LOOP2]] ; NEWGVN: Loop2: -; NEWGVN-NEXT: [[INDEX2:%.*]] = phi i64 [ [[V4]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ] +; NEWGVN-NEXT: [[INDEX2:%.*]] = phi i64 [ [[TMP3]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ] ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P]] to <2 x i64>* -; NEWGVN-NEXT: [[V5:%.*]] = load <2 x i64>, <2 x i64>* [[P2]], align 16 -; NEWGVN-NEXT: [[V6:%.*]] = extractelement <2 x i64> [[V5]], i64 1 +; NEWGVN-NEXT: [[V6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1 ; NEWGVN-NEXT: [[V7:%.*]] = add i64 [[V6]], [[INDEX2]] ; NEWGVN-NEXT: [[INDEX2_INC]] = add i64 [[INDEX2]], 1 ; NEWGVN-NEXT: [[COND2:%.*]] = icmp ne i64 [[INDEX2_INC]], [[TC2:%.*]] @@ -2908,24 +2913,25 @@ ; NEWGVN-NEXT: [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2 ; NEWGVN-NEXT: [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3 ; NEWGVN-NEXT: store <4 x i64> [[I4]], <4 x i64>* [[P1]], align 32 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i256 [[TMP0]] to i128 +; NEWGVN-NEXT: [[TMP3:%.*]] = bitcast i128 [[TMP2]] to <2 x i64> ; NEWGVN-NEXT: [[INDEX1_INC]] = add i64 [[INDEX1]], 1 ; NEWGVN-NEXT: [[COND2:%.*]] = icmp ne i64 [[INDEX1_INC]], [[TC1:%.*]] ; NEWGVN-NEXT: br i1 [[COND2]], label [[LOOP1]], label [[BB1:%.*]] ; NEWGVN: BB1: ; NEWGVN-NEXT: br i1 [[COND1:%.*]], label [[BB2:%.*]], label [[BB3:%.*]] ; NEWGVN: BB2: -; NEWGVN-NEXT: [[V4:%.*]] = load i64, i64* [[P]], align 4 ; NEWGVN-NEXT: br label [[LOOP2:%.*]] ; NEWGVN: BB3: ; NEWGVN-NEXT: br label [[LOOP2]] ; NEWGVN: Loop2: -; NEWGVN-NEXT: [[INDEX2:%.*]] = phi i64 [ [[V4]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ] +; NEWGVN-NEXT: [[INDEX2:%.*]] = phi i64 [ [[TMP1]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ] ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P]] to <2 x i64>* -; NEWGVN-NEXT: [[V5:%.*]] = load <2 x i64>, <2 x i64>* [[P2]], align 16 -; NEWGVN-NEXT: [[V6:%.*]] = extractelement <2 x i64> [[V5]], i64 1 -; NEWGVN-NEXT: [[V7:%.*]] = load i64, i64* [[P]], align 4 +; NEWGVN-NEXT: [[V6:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1 ; NEWGVN-NEXT: [[V8:%.*]] = add i64 [[V6]], [[INDEX2]] -; NEWGVN-NEXT: [[V9:%.*]] = add i64 [[V8]], [[V7]] +; NEWGVN-NEXT: [[V9:%.*]] = add i64 [[V8]], [[TMP1]] ; NEWGVN-NEXT: [[INDEX2_INC]] = add i64 [[INDEX2]], 1 ; NEWGVN-NEXT: [[COND3:%.*]] = icmp ne i64 [[INDEX2_INC]], [[TC2:%.*]] ; NEWGVN-NEXT: br i1 [[COND3]], label [[LOOP2]], label [[EXIT:%.*]] @@ -3274,9 +3280,11 @@ ; NEWGVN-NEXT: [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2 ; NEWGVN-NEXT: [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3 ; NEWGVN-NEXT: store <4 x i64> [[I4]], <4 x i64>* [[P1]], align 32 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64> ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P]] to <2 x i64>* -; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i64>, <2 x i64>* [[P2]], align 16 -; NEWGVN-NEXT: [[V3:%.*]] = extractelement <2 x i64> [[V2]], i64 1 +; NEWGVN-NEXT: [[V3:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1 ; NEWGVN-NEXT: [[V4:%.*]] = add i64 [[V1]], [[V3]] ; NEWGVN-NEXT: [[INDEX_INC]] = add i64 [[INDEX]], 1 ; NEWGVN-NEXT: [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]] @@ -3650,14 +3658,15 @@ ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i64>, <4 x i64>* [[P1]], align 32 ; NEWGVN-NEXT: [[I1:%.*]] = insertelement <4 x i64> [[V1]], i64 [[INDEX]], i32 1 ; NEWGVN-NEXT: store <4 x i64> [[I1]], <4 x i64>* [[P1]], align 32 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[I1]] to i256 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64 ; NEWGVN-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; NEWGVN: BB1: ; NEWGVN-NEXT: br label [[BB3:%.*]] ; NEWGVN: BB2: -; NEWGVN-NEXT: [[V2:%.*]] = load i64, i64* [[P]], align 4 ; NEWGVN-NEXT: br label [[BB3]] ; NEWGVN: BB3: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i64 [ 100, [[BB1]] ], [ [[V2]], [[BB2]] ] +; NEWGVN-NEXT: [[PHI:%.*]] = phi i64 [ 100, [[BB1]] ], [ [[TMP1]], [[BB2]] ] ; NEWGVN-NEXT: [[V3:%.*]] = add i64 [[PHI]], [[INDEX]] ; NEWGVN-NEXT: br label [[LOOP_LATCH]] ; NEWGVN: Loop.Latch: @@ -3911,15 +3920,17 @@ ; NEWGVN: T: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: store <2 x i32> [[V1:%.*]], <2 x i32>* [[P1]], align 4 -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: br label [[EXIT:%.*]] ; NEWGVN: F: ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to <4 x i32>* ; NEWGVN-NEXT: store <4 x i32> [[V2:%.*]], <4 x i32>* [[P2]], align 4 -; NEWGVN-NEXT: [[V4:%.*]] = load i32, i32* [[P]], align 4 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V2]] to i128 +; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 ; NEWGVN-NEXT: br label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V3]], [[T]] ], [ [[V4]], [[F]] ] +; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ [[TMP3]], [[F]] ] ; NEWGVN-NEXT: [[V5:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: ret i32 [[V5]] ; diff --git a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll b/llvm/test/Transforms/NewGVN/pr14166-xfail.ll --- a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll +++ b/llvm/test/Transforms/NewGVN/pr14166-xfail.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -disable-basic-aa -passes=newgvn -S < %s | FileCheck %s ; NewGVN fails this due to missing load coercion target datalayout = "e-p:32:32:32"