diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1483,13 +1483,19 @@ int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL); if (Offset >= 0) { // We can coerce a constant load into a load. - if (auto *C = dyn_cast(lookupOperandLeader(DepLI))) + if (auto *C = dyn_cast(lookupOperandLeader(DepLI))) { if (auto *PossibleConstant = getConstantLoadValueForLoad(C, Offset, LoadType, DL)) { LLVM_DEBUG(dbgs() << "Coercing load from load " << *LI << " to constant " << *PossibleConstant << "\n"); return createConstantExpression(PossibleConstant); } + } else { + // Similarly, we do not create a load expression for the loads that are + // optimized with load coercion. + const_cast(this)->LoadCoercion[LI] = DefiningAccess; + return nullptr; + } } } else if (auto *DepMI = dyn_cast(DepInst)) { int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL); @@ -1559,8 +1565,51 @@ DefiningInst, DefiningAccess)) return CoercionResult; } + } else { + // Here, we try to find loads which can be optimized with load coercion and + // they are live on the entry. + for (const auto &U : DefiningAccess->uses()) + if (auto *MemUse = dyn_cast(U.getUser())) { + LoadInst *DependingLoad = dyn_cast(MemUse->getMemoryInst()); + + if (!DependingLoad || LI == DependingLoad) + continue; + + // If two load instructions have the same operands, then it is not load + // coercion. + if (DependingLoad->getPointerOperand() == LI->getPointerOperand()) + continue; + + // The DependingLoad should have bigger bit size than the load that we + // should optimize. + if (DL.getTypeSizeInBits(DependingLoad->getType()).getFixedSize() < + DL.getTypeSizeInBits(LI->getType()).getFixedSize()) + continue; + + // If we have already found an optimization opportunity for a load + // instruction, then we do not process it again. + if (LoadCoercion.count(LI)) + continue; + + // If the depending load does not have any uses, then we should not do + // load coercion because the depending load will be eliminated. + if (DependingLoad->getNumUses() == 0) + continue; + + // The two loads should be executed in the right order. + if (!DT->dominates(DependingLoad, LI)) + continue; + + performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(), LI, + DependingLoad, + getMemoryAccess(DependingLoad)); + } } + // We do not create a load expression for the loads of load coercion. + if (LoadCoercion.count(LI)) + return nullptr; + const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI, DefiningAccess); // If our MemoryLeader is not our defining access, add a use to the @@ -3890,6 +3939,42 @@ // Emits the sequence of the instructions that replace the load. NewValue = getStoreValueForLoad(Store->getValueOperand(), Offset, LoadToOptimizeTy, InsertPtr, DL); + } else if (LoadInst *Load = dyn_cast(DependingInsn)) { + int Offset = analyzeLoadFromClobberingLoad( + LoadToOptimizeTy, LoadToOptimize->getPointerOperand(), Load, DL); + + // In case, the depending load has already been optimized, than we get the + // load that we used to do the optimization of the depending load. In the + // following example, %V2 will be optimized by %V1 according to + // LoadCoercion map. Similarly, %V3 will be optimized by %V2. + // %P1 = bitcast i8* %P to <4 x i32>* + // %V1 = load <4 x i32>, <4 x i32>* %P1 + // %P2 = bitcast i8* %P to <2 x i32>* + // %V2 = load <2 x i32>, <2 x i32>* %P2 + // %P3 = bitcast i8* %P to i32* + // %V3 = load i32, i32* %P3 + // We first process %V2. Next, we process %V3. Since %V2 has already been + // optimized, we use %V1 to optimize %V3. The code after load coercion and + // before the elimination phase will be: + // %P1 = bitcast i8* %P to <4 x i32>* + // %V1 = load <4 x i32>, <4 x i32>* %P1, align 16 + // %P2 = bitcast i8* %P to <2 x i32>* + // %0 = bitcast <4 x i32> %V1 to i128 + // %1 = trunc i128 %0 to i64 + // %2 = bitcast i64 %1 to <2 x i32> + // %P3 = bitcast i8* %P to i32* + // %3 = bitcast <4 x i32> %V1 to i128 + // %4 = trunc i128 %0 to i32 + LoadInst *DependingLoad = Load; + if (OptimizedLoads.count(DependingLoad)) { + while (LoadCoercion.count(DependingLoad)) + DependingLoad = + cast(cast(LoadCoercion[DependingLoad]) + ->getMemoryInst()); + } + // Emits the sequence of the instructions that replace the load. + NewValue = getLoadValueForLoad(DependingLoad, Offset, LoadToOptimizeTy, + InsertPtr, DL); } OptimizedLoads.insert(LoadToOptimize); InstructionsToErase.insert(LoadToOptimize); @@ -3907,8 +3992,10 @@ continue; // Check if the two load instructions have the same type and if the memory - // instructions that they depend on have the same memory access. - if (LoadToOptimize->getType() == LI->getType() && MA == P.second) { + // instructions that they depend on have the same memory access and if LI + // is executed after the LoadToOptimize. + if (LoadToOptimize->getType() == LI->getType() && MA == P.second && + DT->dominates(LoadToOptimize, LI)) { OptimizedLoads.insert(LI); InstructionsToErase.insert(LI); LI->replaceAllUsesWith(NewValue); diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll --- a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll +++ b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll @@ -13,9 +13,8 @@ ; NEWGVN-LABEL: @test1( ; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[V3:%.*]] = trunc i32 [[V1]] to i8 -; NEWGVN-NEXT: [[V4:%.*]] = add i8 [[V2]], [[V3]] +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 +; NEWGVN-NEXT: [[V4:%.*]] = add i8 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i8 [[V4]] ; %V1 = load i32, i32* %P1 @@ -42,10 +41,9 @@ ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 -; NEWGVN-NEXT: [[V3:%.*]] = bitcast <2 x i32> [[V1]] to i64 -; NEWGVN-NEXT: [[V4:%.*]] = trunc i64 [[V3]] to i32 -; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[V2]], [[V4]] +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +; NEWGVN-NEXT: [[V5:%.*]] = add i32 [[TMP1]], [[TMP1]] ; NEWGVN-NEXT: ret i32 [[V5]] ; entry: @@ -76,9 +74,10 @@ ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <2 x i32>, i32 }> undef, <2 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[V2]], 1 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[TMP1]], 1 ; NEWGVN-NEXT: ret <{ <2 x i32>, i32 }> [[I2]] ; entry: @@ -142,8 +141,10 @@ ; NEWGVN-NEXT: [[V0:%.*]] = load i32, i32* [[P0]], align 4 ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> undef, <2 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I1]], i32 [[V0]], 1 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I1]], i32 [[TMP1]], 1 ; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I2]], i32 [[V0]], 2 ; NEWGVN-NEXT: ret <{ <2 x i32>, i32, i32 }> [[I3]] ; @@ -182,13 +183,15 @@ ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>* -; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P3]], align 4 +; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> undef, <4 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[V2]], 1 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[V3]], 2 -; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[V2]], 3 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP3]], 2 +; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]] ; entry: @@ -220,13 +223,12 @@ ; NEWGVN-LABEL: @test7( ; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to float* -; NEWGVN-NEXT: [[V2:%.*]] = load float, float* [[P2]], align 4 +; NEWGVN-NEXT: [[TMP1:%.*]] = bitcast i32 [[V1]] to float ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: -; NEWGVN-NEXT: ret float [[V2]] +; NEWGVN-NEXT: ret float [[TMP1]] ; NEWGVN: F: -; NEWGVN-NEXT: [[V3:%.*]] = bitcast i32 [[V1]] to float -; NEWGVN-NEXT: ret float [[V3]] +; NEWGVN-NEXT: ret float [[TMP1]] ; %V1 = load i32, i32* %P1 %P2 = bitcast i32* %P1 to float* @@ -262,11 +264,13 @@ ; NEWGVN-NEXT: [[V0:%.*]] = load i32, i32* [[P0]], align 4 ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> undef, <2 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I1]], i32 [[V0]], 1 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I1]], i32 [[TMP1]], 1 ; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I2]], i32 [[V0]], 2 -; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I3]], i32 [[V0]], 3 -; NEWGVN-NEXT: [[I5:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I4]], i32 [[V0]], 4 +; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I3]], i32 [[TMP1]], 3 +; NEWGVN-NEXT: [[I5:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I4]], i32 [[TMP1]], 4 ; NEWGVN-NEXT: ret <{ <2 x i32>, i32, i32, i32, i32 }> [[I5]] ; entry: @@ -310,13 +314,16 @@ ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>* -; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P3]], align 4 +; NEWGVN-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; NEWGVN-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> undef, <4 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[V2]], 1 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[V3]], 2 -; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[V2]], 3 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP4]], 2 +; NEWGVN-NEXT: [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]] ; entry: @@ -359,16 +366,18 @@ ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16 ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> undef, <4 x i32> [[V1]], 0 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> +; NEWGVN-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32 ; NEWGVN-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; NEWGVN: T: ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>* -; NEWGVN-NEXT: [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[V2]], 1 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[TMP2]], 1 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32 }> [[I2]] ; NEWGVN: F: ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V3:%.*]] = load i32, i32* [[P3]], align 4 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[V3]], 2 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[TMP3]], 2 ; NEWGVN-NEXT: ret <{ <4 x i32>, <2 x i32>, i32 }> [[I3]] ; entry: @@ -409,12 +418,13 @@ ; NEWGVN-NEXT: [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>* ; NEWGVN-NEXT: [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8 ; NEWGVN-NEXT: [[P2:%.*]] = bitcast i8* [[P]] to i32* -; NEWGVN-NEXT: [[V2:%.*]] = load i32, i32* [[P2]], align 4 +; NEWGVN-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; NEWGVN-NEXT: [[P3:%.*]] = bitcast i8* [[P]] to float* -; NEWGVN-NEXT: [[V3:%.*]] = load float, float* [[P3]], align 4 +; NEWGVN-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float ; NEWGVN-NEXT: [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, float }> undef, <2 x i32> [[V1]], 0 -; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I1]], i32 [[V2]], 1 -; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I2]], float [[V3]], 2 +; NEWGVN-NEXT: [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I1]], i32 [[TMP1]], 1 +; NEWGVN-NEXT: [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I2]], float [[TMP2]], 2 ; NEWGVN-NEXT: ret <{ <2 x i32>, i32, float }> [[I3]] ; entry: @@ -443,13 +453,11 @@ ; ; NEWGVN-LABEL: @test12( ; NEWGVN-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4 -; NEWGVN-NEXT: [[P2:%.*]] = bitcast i32* [[P1]] to i8* -; NEWGVN-NEXT: [[V2:%.*]] = load i8, i8* [[P2]], align 1 -; NEWGVN-NEXT: [[V3:%.*]] = trunc i32 [[V1]] to i8 +; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V1]] to i8 ; NEWGVN-NEXT: store i32 [[V:%.*]], i32* [[P1]], align 4 -; NEWGVN-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8 -; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[V2]], [[V3]] -; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[TMP1]], [[V5]] +; NEWGVN-NEXT: [[TMP2:%.*]] = trunc i32 [[V]] to i8 +; NEWGVN-NEXT: [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]] +; NEWGVN-NEXT: [[V6:%.*]] = add i8 [[TMP2]], [[V5]] ; NEWGVN-NEXT: ret i8 [[V6]] ; %V1 = load i32, i32* %P1