diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1454,8 +1454,83 @@ return createStoreExpression(SI, StoreAccess); } +// A load can have one or more dependencies as the following examples show: +// +// Example 1: +// BB1: +// ... +// store i32 %V1, i32* %P +// ... +// %V2 = load i32, i32* %P +// ... +// +// Example 2: +// BB1: BB2: +// store i32 %V1, i32* %P %V2 = load i32, i32* %P +// br label %BB3 br label %BB3 +// \ / +// BB3: +// %V3 = load i32, i32* %P +// +// In the first example, the load (%V2) has only one dependency. In the second +// example, the load (%V3) has two dependencies. Therefore, we add the load +// along with its two dependencies in LoadCoercion map. However, this is not +// always the case as it is shown below: +// +// Example 3: +// BB1: +// %P1 = bitcast i32* %P to <4 x i32>* +// %V1 = load <4 x i32>, <4 x i32>* %P1 +// br i1 %cond, label %BB2, label %BB3 +// / \ +// BB2: BB3: +// %P2 = bitcast i32* %P to <2 x i32>* %V3 = load i32, i32* %P +// %V2 = load <2 x i32>, <2 x i32>* %P2 br label %BB4 +// br label %BB4 / +// \ / +// BB4: +// %V4 = load i32, i32* %P +// +// In the above example, the load (%V4) can be optimized out by any of the loads +// (%V1, %V2, %V3). But, loads %V2 and %V3 can also be optimized out by %V1. For +// this reason, we need to do an extra check before we add the load in the map. +// We check if the load is already in the map and if the existing depending +// instruction dominates the current depending instruction. If so, then we do +// not add the new depending instruction in LoadCoercion map. If the current +// depending instruction dominates the existing depending instruction, then we +// remove the existing depending instruction from LoadCoercion map and we add +// the current depending instruction. Therefore, in Example 3, the load +// (%V4) has only one dependency (%V1) and we add only this one in LoadCoercion +// map. void NewGVN::tryAddLoadDepInsnIntoLoadCoercionMap( LoadInst *LI, Instruction *CurrentDepI) const { + // Check if LI already exit in LoadCoercion map. + auto It = const_cast(this)->LoadCoercion.find(LI); + if (It != LoadCoercion.end()) { + auto &ExistingDepIs = It->second; + // Iterate over all the existing depending instructions of LI. + for (Instruction *ExistingDepI : + llvm::make_early_inc_range(ExistingDepIs)) { + + if (MSSAWalker->getClobberingMemoryAccess(getMemoryAccess(CurrentDepI)) == + MSSAWalker->getClobberingMemoryAccess( + getMemoryAccess(ExistingDepI)) && + isa(ExistingDepI) && isa(CurrentDepI)) { + // If the existing depending instruction dominates the current depending + // instruction, then we should not add the current depending instruction + // in LoadCoercion map (Example 3). + if (DT->dominates(ExistingDepI, CurrentDepI)) + return; + + // If the current depending instruction dominates the existing one, then + // we remove the existing depending instruction from the LoadCoercion + // map. Next, we add the current depending instruction in LoadCoercion + // map. + if (DT->dominates(CurrentDepI, ExistingDepI)) + ExistingDepIs.erase(ExistingDepI); + } + } + } // Add the load and the corresponding depending instruction in LoadCoercion // map. const_cast(this)->LoadCoercion[LI].insert(CurrentDepI); @@ -1498,13 +1573,19 @@ int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL); if (Offset >= 0) { // We can coerce a constant load into a load. - if (auto *C = dyn_cast(lookupOperandLeader(DepLI))) + if (auto *C = dyn_cast(lookupOperandLeader(DepLI))) { if (auto *PossibleConstant = getConstantLoadValueForLoad(C, Offset, LoadType, DL)) { LLVM_DEBUG(dbgs() << "Coercing load from load " << *LI << " to constant " << *PossibleConstant << "\n"); return createConstantExpression(PossibleConstant); } + } else if (EnableLoadCoercion) { + // Similarly, we do not create a load expression for the loads that are + // elimianted with load coercion. + tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepInst); + return nullptr; + } } } else if (auto *DepMI = dyn_cast(DepInst)) { int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL); @@ -1574,8 +1655,45 @@ DefiningInst, DefiningAccess)) return CoercionResult; } + } else if (EnableLoadCoercion) { + // Check if any of the live-in loads can be eliminated with load coercion. + for (const auto &U : DefiningAccess->uses()) + if (auto *MemUse = dyn_cast(U.getUser())) { + LoadInst *DependingLoad = dyn_cast(MemUse->getMemoryInst()); + + if (!DependingLoad || LI == DependingLoad) + continue; + + // The DependingLoad should have bigger bit size than the load that we + // should optimize. + if (DL.getTypeSizeInBits(DependingLoad->getType()).getFixedSize() < + DL.getTypeSizeInBits(LI->getType()).getFixedSize()) + continue; + + // If the depending load does not have any uses, then we should not do + // load coercion because the depending load will be eliminated. + if (DependingLoad->getNumUses() == 0) + continue; + + // If two load instructions have the same operands, then it is not + // load coercion. + bool DependingLoadDomintatesLI = DT->dominates(DependingLoad, LI); + if (DependingLoad->getPointerOperand() == LI->getPointerOperand() && + DependingLoadDomintatesLI) + continue; + + // The two loads should be executed in the right order. + if (DependingLoadDomintatesLI) + performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(), + LI, DependingLoad, + getMemoryAccess(DependingLoad)); + } } + // We do not create a load expression for the loads of load coercion. + if (EnableLoadCoercion && LoadCoercion.count(LI)) + return nullptr; + const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI, DefiningAccess); // If our MemoryLeader is not our defining access, add a use to the diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll @@ -12,10 +12,9 @@ ; ; NEWGVN-LABEL: @test1( ; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[V3:%.*]] = trunc i32 [[V1]] to i8 -; NEWGVN-NEXT: [[V4:%.*]] = add i8 [[V2]], [[V3]] +; NEWGVN-NEXT: [[V4:%.*]] = add i8 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i8 [[V4]] ; %V1 = load i32, i32* %P1 @@ -41,11 +40,10 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 -; NEWGVN-NEXT: [[V3:%.*]] = bitcast <2 x i32> [[V1]] to i64 -; NEWGVN-NEXT: [[V4:%.*]] = trunc i64 [[V3]] to i32 -; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[V2]], [[V4]] +; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i32 [[V5]] ; Entry: @@ -75,10 +73,11 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <2 x i32>, i32 }> poison, <2 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[V2]], 1 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[TMP1]], 1 ; NEWGVN-NEXT: ret <{ <2 x i32>, i32 }> [[I2]] ; Entry: @@ -171,14 +170,16 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> +; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>* -; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P3]], align 4 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> poison, <4 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[V2]], 1 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[V3]], 2 -; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[V2]], 3 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP3]], 2 +; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]] ; Entry: @@ -215,14 +216,13 @@ ; NEWGVN-LABEL: @test7( ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast i32 [[V1]] to float ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP0]] ; NEWGVN: F: -; NEWGVN-NEXT: [[V3:%.*]] = bitcast i32 [[V1]] to float -; NEWGVN-NEXT: ret float [[V3]] +; NEWGVN-NEXT: ret float [[TMP0]] ; Entry: %V1 = load i32, i32* %P1 @@ -306,14 +306,16 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> +; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>* -; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P3]], align 4 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> poison, <4 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[V2]], 1 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[V3]], 2 -; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[V2]], 3 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP3]], 2 +; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]] ; Entry: @@ -359,17 +361,19 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32 +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP0]] to i64 +; NEWGVN-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> poison, <4 x i32> [[V1]], 0 ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>* -; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[V2]], 1 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[TMP3]], 1 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32 }> [[I2]] ; NEWGVN: F: ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P3]], align 4 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[V3]], 2 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[TMP1]], 2 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32 }> [[I3]] ; Entry: @@ -409,13 +413,14 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to float* -; NEWGVN-NEXT: [[V3:%.*]] = load float, float* [[P3]], align 4 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, float }> poison, <2 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I1]], i32 [[V2]], 1 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I2]], float [[V3]], 2 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I1]], i32 [[TMP1]], 1 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I2]], float [[TMP2]], 2 ; NEWGVN-NEXT: ret <{ <2 x i32>, i32, float }> [[I3]] ; Entry: @@ -444,13 +449,11 @@ ; ; NEWGVN-LABEL: @test12( ; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4 -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[V3:%.*]] = trunc i32 [[V1]] to i8 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: store i32 [[V:%.*]], i32* [[P1]], align 4 -; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8 -; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V3]] -; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[TMP1]], [[V5]] +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i32 [[V]] to i8 +; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]] +; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[TMP2]], [[V5]] ; NEWGVN-NEXT: ret i8 [[V6]] ; %V1 = load i32, i32* %P1 @@ -504,40 +507,23 @@ ; vv ; Exit ; -; OLDGVN-LABEL: @test14( -; OLDGVN-NEXT: Entry: -; OLDGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* -; OLDGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 -; OLDGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 -; OLDGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; OLDGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; OLDGVN: T: -; OLDGVN-NEXT: br label [[EXIT:%.*]] -; OLDGVN: F: -; OLDGVN-NEXT: br label [[EXIT]] -; OLDGVN: Exit: -; OLDGVN-NEXT: [[PHI:%.*]] = phi i32 [ 100, [[T]] ], [ 200, [[F]] ] -; OLDGVN-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1 -; OLDGVN-NEXT: [[V4:%.*]] = add i32 [[TMP1]], [[V2]] -; OLDGVN-NEXT: [[V5:%.*]] = add i32 [[V4]], [[PHI]] -; OLDGVN-NEXT: ret i32 [[V5]] -; -; NEWGVN-LABEL: @test14( -; NEWGVN-NEXT: Entry: -; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* -; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 -; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; NEWGVN: T: -; NEWGVN-NEXT: br label [[EXIT:%.*]] -; NEWGVN: F: -; NEWGVN-NEXT: br label [[EXIT]] -; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ 100, [[T]] ], [ 200, [[F]] ] -; NEWGVN-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1 -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P]], align 4 -; NEWGVN-NEXT: [[V4:%.*]] = add i32 [[V3]], [[V2]] -; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[V4]], [[PHI]] -; NEWGVN-NEXT: ret i32 [[V5]] +; GVN-LABEL: @test14( +; GVN-NEXT: Entry: +; GVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* +; GVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; GVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; GVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +; GVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; GVN: T: +; GVN-NEXT: br label [[EXIT:%.*]] +; GVN: F: +; GVN-NEXT: br label [[EXIT]] +; GVN: Exit: +; GVN-NEXT: [[PHI:%.*]] = phi i32 [ 100, [[T]] ], [ 200, [[F]] ] +; GVN-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1 +; GVN-NEXT: [[V4:%.*]] = add i32 [[TMP1]], [[V2]] +; GVN-NEXT: [[V5:%.*]] = add i32 [[V4]], [[PHI]] +; GVN-NEXT: ret i32 [[V5]] ; Entry: %P1 = bitcast i32* %P to <2 x i32>* diff --git a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll @@ -673,13 +673,14 @@ ; NEWGVN-NEXT: Entry: ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1 ; NEWGVN-NEXT: br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; NEWGVN: Loop: -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P]], align 4 ; NEWGVN-NEXT: br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]] ; NEWGVN: Exit: -; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY:%.*]] ], [ [[V3]], [[LOOP]] ] +; NEWGVN-NEXT: [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY:%.*]] ], [ [[TMP1]], [[LOOP]] ] ; NEWGVN-NEXT: ret i32 [[PHI]] ; Entry: