Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -644,6 +644,7 @@ /// shift amount, zero extend type and loadSize. struct LoadOps { LoadInst *Root = nullptr; + LoadInst *InsertPoint = nullptr; bool FoundRoot = false; uint64_t LoadSize = 0; Value *Shift = nullptr; @@ -778,7 +779,9 @@ if (LOps.FoundRoot == false) { LOps.FoundRoot = true; AATags1 = LI1->getAAMetadata(); - } + LOps.InsertPoint = Start; + } else if (LOps.InsertPoint && Start->comesBefore(LOps.InsertPoint)) + LOps.InsertPoint = Start; LOps.LoadSize = LoadSize1 + LoadSize2; // Concatenate the AATags of the Merged Loads. @@ -817,7 +820,7 @@ // New load can be generated Value *Load1Ptr = LI1->getPointerOperand(); - Builder.SetInsertPoint(LI1); + Builder.SetInsertPoint(LOps.InsertPoint); Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS)); NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(), LI1->isVolatile(), ""); Index: llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -1772,8 +1772,8 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert( ; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 ; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_badinsert( Index: llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -1863,8 +1863,8 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) { ; LE-LABEL: @loadCombine_2consecutive_badinsert( ; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 ; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 ; LE-NEXT: ret i16 [[L1]] ; ; BE-LABEL: @loadCombine_2consecutive_badinsert( @@ -1892,8 +1892,8 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert( ; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 ; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_badinsert(