diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -157,6 +157,10 @@ static cl::opt EnablePhiOfOps("enable-phi-of-ops", cl::init(true), cl::Hidden); +// Enables load coercion for non-constant values. +static cl::opt EnableLoadCoercion("enable-load-coercion", cl::init(true), + cl::Hidden); + //===----------------------------------------------------------------------===// // GVN Pass //===----------------------------------------------------------------------===// @@ -656,6 +660,9 @@ // Deletion info. SmallPtrSet InstructionsToErase; + // Maps the loads to their depending instructions. + std::map> LoadCoercion; + public: NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, @@ -900,6 +907,19 @@ // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. int64_t StartingVNCounter = 0; + + // The following functions are used in load coercion: + // Try to add the load along with the depending instrunction(s) in + // LoadCoercion map. + void tryAddLoadDepInsnIntoLoadCoercionMap(LoadInst *, Instruction *) const; + // Eliminates the loads of the LoadCoercion map. + bool implementLoadCoercion(); + // Run value numbering for the instructions that are generated during load + // coercion. + void runValueNumberingForLoadCoercionInsns(Instruction *); + // Extract from the depending instruction the value that will replace the + // load. + Value *getExtractedValue(LoadInst *, Instruction *); }; } // end anonymous namespace @@ -1434,6 +1454,13 @@ return createStoreExpression(SI, StoreAccess); } +void NewGVN::tryAddLoadDepInsnIntoLoadCoercionMap( + LoadInst *LI, Instruction *CurrentDepI) const { + // Add the load and the corresponding depending instruction in LoadCoercion + // map. + const_cast(this)->LoadCoercion[LI].insert(CurrentDepI); +} + // See if we can extract the value of a loaded pointer from a load, a store, or // a memory instruction. const Expression * @@ -1456,6 +1483,12 @@ << " to constant " << *C << "\n"); return createConstantExpression( getConstantStoreValueForLoad(C, Offset, LoadType, DL)); + } else if (EnableLoadCoercion) { + tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepInst); + // Load coercion occurs before the elimination phase. The loads that + // will be eliminated with load coercion are not added in any congruence + // class. Thus, we do not create any load expression for them. + return nullptr; } } } else if (auto *DepLI = dyn_cast(DepInst)) { @@ -2977,6 +3010,7 @@ MemoryToUsers.clear(); RevisitOnReachabilityChange.clear(); IntrinsicInstPred.clear(); + LoadCoercion.clear(); } // Assign local DFS number mapping to instructions, and leave space for Value @@ -3481,6 +3515,15 @@ verifyIterationSettled(F); verifyStoreExpressions(); + // During load coercion, we replace the load instructions with a new sequence + // of instructions. Next, we run value numbering which adds the new + // instructions in the right congruence classes. In this way, any redundant + // instructions will be optimized out in the elimination phase. + if (EnableLoadCoercion && implementLoadCoercion()) + // Update the newly generated instructions with the correct DFS numbers. + // TODO: Update DFS numbers faster. + ICount = updateDFSNumbers(ICount); + Changed |= eliminateInstructions(F); // Delete all instructions marked for deletion. @@ -3816,6 +3859,79 @@ return nullptr; } +// Run value numbering for the instructions that are generated during load +// coercion. In this way, any redundant instructions will be removed in the +// elimination phase. +void NewGVN::runValueNumberingForLoadCoercionInsns(Instruction *I) { + TOPClass->insert(I); + ValueToClass[I] = TOPClass; + valueNumberInstruction(I); + updateProcessedCount(I); +} + +// Extract the correct value from the depending instruction. +Value *NewGVN::getExtractedValue(LoadInst *LI, Instruction *DepI) { + + Type *LoadTy = LI->getType(); + Value *NewValue = nullptr; + Instruction *InsertPtr = nullptr; + if (auto *Store = dyn_cast(DepI)) { + int Offset = analyzeLoadFromClobberingStore(LoadTy, LI->getPointerOperand(), + Store, DL); + InsertPtr = Store->getNextNode(); + // Emit the instructions that extract the correct value from store. + NewValue = getStoreValueForLoad(Store->getValueOperand(), Offset, LoadTy, + InsertPtr, DL); + } else if (LoadInst *Load = dyn_cast(DepI)) { + int Offset = analyzeLoadFromClobberingLoad(LoadTy, LI->getPointerOperand(), + Load, DL); + InsertPtr = Load->getNextNode(); + // Emit the instructions that extract the correct value from load. + NewValue = getLoadValueForLoad(Load, Offset, LoadTy, InsertPtr, DL); + } + + if (!isa(NewValue) && !isa(NewValue)) + for (Instruction *CurInsn = DepI->getNextNode(); CurInsn != InsertPtr; + CurInsn = CurInsn->getNextNode()) + runValueNumberingForLoadCoercionInsns(CurInsn); + + return NewValue; +} + +// Iterate over the load instructions of LoadCoercion map and it replaces +// them with the right sequence of instructions. +bool NewGVN::implementLoadCoercion() { + bool AnythingReplaced = false; + for (const auto &P : LoadCoercion) { + LoadInst *LI = cast(P.first); + SmallPtrSet DependingInsns = P.second; + Value *NewValue = nullptr; + Instruction *FirstDepI = *DependingInsns.begin(); + + if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI)) + // Extract the correct value from the depending instruction. + NewValue = getExtractedValue(LI, FirstDepI); + + // Load coercion was rejected. + if (!NewValue) + continue; + + // Remove the load and update all of its uses. + InstructionsToErase.insert(LI); + LI->replaceAllUsesWith(NewValue); + if (isa(NewValue)) + NewValue->takeName(LI); + if (Instruction *I = dyn_cast(NewValue)) + I->setDebugLoc(LI->getDebugLoc()); + LLVM_DEBUG(dbgs() << "Load coersion: The load " << *LI + << " was eliminated and its uses were replaced by " + << *NewValue << "\n"); + AnythingReplaced = true; + } + + return AnythingReplaced; +} + bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll @@ -448,9 +448,9 @@ ; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 ; NEWGVN-NEXT: [[V3:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: store i32 [[V:%.*]], i32* [[P1]], align 4 -; NEWGVN-NEXT: [[V4:%.*]] = load i8, i8* [[P2]], align 1 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8 ; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V3]] -; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[V4]], [[V5]] +; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[TMP1]], [[V5]] ; NEWGVN-NEXT: ret i8 [[V6]] ; %V1 = load i32, i32* %P1 diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll @@ -11,9 +11,9 @@ ; ; NEWGVN-LABEL: @test1( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = bitcast i32 [[V1]] to float ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P1]], align 4 -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP1]] ; store i32 %V1, i32* %P %P1 = bitcast i32* %P to float* @@ -32,9 +32,11 @@ ; ; NEWGVN-LABEL: @test2( ; NEWGVN-NEXT: store i64* [[V1:%.*]], i64** [[P1:%.*]], align 8 +; NEWGVN-NEXT: [[TMP1:%.*]] = ptrtoint i64* [[V1]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; NEWGVN-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64** [[P1]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP3]] ; store i64* %V1, i64** %P1 %P2 = bitcast i64** %P1 to float* @@ -51,9 +53,9 @@ ; ; NEWGVN-LABEL: @test3( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: ret i8 [[V2]] +; NEWGVN-NEXT: ret i8 [[TMP1]] ; store i32 %V1, i32* %P1 %P2 = bitcast i32* %P1 to i8* @@ -71,9 +73,10 @@ ; ; NEWGVN-LABEL: @test4( ; NEWGVN-NEXT: store i64 [[V1:%.*]], i64* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[V1]] to i32 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P1]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP2]] ; store i64 %V1, i64* %P1 %P2 = bitcast i64* %P1 to float* @@ -107,9 +110,9 @@ ; ; NEWGVN-LABEL: @test6( ; NEWGVN-NEXT: store i64 [[V1:%.*]], i64* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = inttoptr i64 [[V1]] to i8* ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i64* [[P1]] to i8** -; NEWGVN-NEXT: [[V2:%.*]] = load i8*, i8** [[P2]], align 8 -; NEWGVN-NEXT: ret i8* [[V2]] +; NEWGVN-NEXT: ret i8* [[TMP1]] ; store i64 %V1, i64* %P1 %P2 = bitcast i64* %P1 to i8** @@ -127,9 +130,10 @@ ; ; NEWGVN-LABEL: @test7( ; NEWGVN-NEXT: store double [[V1:%.*]], double* [[P1:%.*]], align 8 +; NEWGVN-NEXT: [[TMP1:%.*]] = bitcast double [[V1]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast double* [[P1]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 -; NEWGVN-NEXT: ret i32 [[V2]] +; NEWGVN-NEXT: ret i32 [[TMP2]] ; store double %V1, double* %P1 %P2 = bitcast double* %P1 to i32* @@ -148,10 +152,11 @@ ; ; NEWGVN-LABEL: @test8( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = lshr i32 [[V1]], 16 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* ; NEWGVN-NEXT: [[P3:%.*]] = getelementptr i8, i8* [[P2]], i32 2 -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P3]], align 1 -; NEWGVN-NEXT: ret i8 [[V2]] +; NEWGVN-NEXT: ret i8 [[TMP2]] ; store i32 %V1, i32* %P1 %P2 = bitcast i32* %P1 to i8* @@ -179,14 +184,12 @@ ; NEWGVN-LABEL: @test9( ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: store i64 [[V:%.*]], i64* [[P1:%.*]], align 4 -; NEWGVN-NEXT: [[P3:%.*]] = bitcast i64* [[P1]] to double* +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast i64 [[V]] to double ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: -; NEWGVN-NEXT: [[B:%.*]] = load double, double* [[P3]], align 8 -; NEWGVN-NEXT: ret double [[B]] +; NEWGVN-NEXT: ret double [[TMP0]] ; NEWGVN: F: -; NEWGVN-NEXT: [[C:%.*]] = load double, double* [[P3]], align 8 -; NEWGVN-NEXT: ret double [[C]] +; NEWGVN-NEXT: ret double [[TMP0]] ; Entry: %A = load i64 , i64* %P1 @@ -216,12 +219,12 @@ ; ; NEWGVN-LABEL: @test10( ; NEWGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V0]] to i8 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i32 [[V0]] to float ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V1:%.*]] = load float, float* [[P1]], align 4 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, i8 [[V2]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[V1]], 1 +; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP1]], 0 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[TMP2]], 1 ; NEWGVN-NEXT: ret <{ i8, float }> [[I2]] ; store i32 %V0, i32* %P @@ -239,35 +242,20 @@ ; / \ ; T F ; -; OLDGVN-LABEL: @test11( -; OLDGVN-NEXT: Entry: -; OLDGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 -; OLDGVN-NEXT: [[TMP0:%.*]] = trunc i32 [[V0]] to i8 -; OLDGVN-NEXT: [[TMP1:%.*]] = bitcast i32 [[V0]] to float -; OLDGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; OLDGVN: T: -; OLDGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; OLDGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, float [[TMP1]], 1 -; OLDGVN-NEXT: ret <{ i8, float }> [[I1]] -; OLDGVN: F: -; OLDGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; OLDGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP0]], 0 -; OLDGVN-NEXT: ret <{ i8, float }> [[I2]] -; -; NEWGVN-LABEL: @test11( -; NEWGVN-NEXT: Entry: -; NEWGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 -; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; NEWGVN: T: -; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V1:%.*]] = load float, float* [[P1]], align 4 -; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, float [[V1]], 1 -; NEWGVN-NEXT: ret <{ i8, float }> [[I1]] -; NEWGVN: F: -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> poison, i8 [[V2]], 0 -; NEWGVN-NEXT: ret <{ i8, float }> [[I2]] +; GVN-LABEL: @test11( +; GVN-NEXT: Entry: +; GVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 +; GVN-NEXT: [[TMP0:%.*]] = trunc i32 [[V0]] to i8 +; GVN-NEXT: [[TMP1:%.*]] = bitcast i32 [[V0]] to float +; GVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; GVN: T: +; GVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* +; GVN-NEXT: [[I1:%.*]] = insertvalue <{ i8, float }> poison, float [[TMP1]], 1 +; GVN-NEXT: ret <{ i8, float }> [[I1]] +; GVN: F: +; GVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; GVN-NEXT: [[I2:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP0]], 0 +; GVN-NEXT: ret <{ i8, float }> [[I2]] ; Entry: store i32 %V0, i32* %P @@ -308,16 +296,13 @@ ; NEWGVN-LABEL: @test12( ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: store i32 [[V0:%.*]], i32* [[P:%.*]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast i32 [[V0]] to float ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: -; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V1:%.*]] = load float, float* [[P1]], align 4 -; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ float, float }> poison, float [[V1]], 1 +; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ float, float }> poison, float [[TMP0]], 1 ; NEWGVN-NEXT: ret <{ float, float }> [[I1]] ; NEWGVN: F: -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ float, float }> poison, float [[V2]], 0 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ float, float }> poison, float [[TMP0]], 0 ; NEWGVN-NEXT: ret <{ float, float }> [[I2]] ; Entry: @@ -348,11 +333,10 @@ ; ; NEWGVN-LABEL: @test13( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i32* [[P1]] to i64* -; NEWGVN-NEXT: [[V4:%.*]] = trunc i32 [[V1]] to i8 -; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V4]] +; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i8 [[V5]] ; store i32 %V1, i32* %P1 @@ -375,9 +359,8 @@ ; ; NEWGVN-LABEL: @test14( ; NEWGVN-NEXT: store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4 -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V2]] +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 +; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i8 [[V5]] ; store i32 %V1, i32* %P1 diff --git a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll b/llvm/test/Transforms/NewGVN/pr14166-xfail.ll --- a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll +++ b/llvm/test/Transforms/NewGVN/pr14166-xfail.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -disable-basic-aa -passes=newgvn -S < %s | FileCheck %s ; NewGVN fails this due to missing load coercion target datalayout = "e-p:32:32:32"