Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -791,16 +791,17 @@ if (!Allowed || !Fast) return false; - // Make sure the Load pointer of type GEP/non-GEP is above insert point - Instruction *Inst = dyn_cast(LI1->getPointerOperand()); - if (Inst && Inst->getParent() == LI1->getParent() && - !Inst->comesBefore(LOps.RootInsert)) - Inst->moveBefore(LOps.RootInsert); - - // New load can be generated + // Get the Index and Ptr for the new GEP. Value *Load1Ptr = LI1->getPointerOperand(); + APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); + Load1Ptr = + Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1, + /* AllowNonInbounds */ true); + // New load can be generated Builder.SetInsertPoint(LOps.RootInsert); - Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS)); + Value *newGEP = Builder.CreateGEP(Builder.getInt8Ty(), Load1Ptr, + Builder.getInt32(Offset1.getZExtValue())); + Value *NewPtr = Builder.CreateBitCast(newGEP, WiderType->getPointerTo(AS)); NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(), LI1->isVolatile(), ""); NewLoad->takeName(LI1); Index: llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -1869,8 +1869,8 @@ define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert3( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_badinsert3( @@ -1928,7 +1928,8 @@ ; LE-NEXT: [[CMP:%.*]] = icmp eq i8 [[C1]], 0 ; LE-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BB2:%.*]] ; LE: bb2: -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP0]], align 1 ; LE-NEXT: br label [[END]] ; LE: end: ; LE-NEXT: [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[L1]], [[BB2]] ] @@ -2085,3 +2086,82 @@ %o3 = or i32 %o2, %e1 ret i32 %o3 } + +define void @nested_gep(ptr %p, ptr %dest) { +; LE-LABEL: @nested_gep( +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 +; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 +; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; LE-NEXT: ret void +; +; BE-LABEL: @nested_gep( +; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72 +; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64 +; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32 +; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 64 +; BE-NEXT: [[FINAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[GEP2]], i64 4 +; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4 +; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64 +; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]] +; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0 +; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; BE-NEXT: ret void +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 72 + %ld1 = load i32, ptr %gep1, align 4 + %ld1_zext = zext i32 %ld1 to i64 + %ld1_shl = shl nuw i64 %ld1_zext, 32 + %gep2 = getelementptr inbounds i8, ptr %p, i64 64 + ; Don't move final_ptr before gep2 + %final_ptr = getelementptr inbounds i8, ptr %gep2, i64 4 + %ld2 = load i32, ptr %final_ptr, align 4 + %ld2_zext = zext i32 %ld2 to i64 + %or = or i64 %ld1_shl, %ld2_zext + %add = add i64 %or, 0 + %trunc = trunc i64 %add to i32 + store i32 %trunc, ptr %dest, align 4 + ret void +} + + +define void @bitcast_gep(ptr %p, ptr %dest) { +; LE-LABEL: @bitcast_gep( +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 +; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 +; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; LE-NEXT: ret void +; +; BE-LABEL: @bitcast_gep( +; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72 +; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64 +; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32 +; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 68 +; BE-NEXT: [[FINAL_PTR:%.*]] = bitcast ptr [[GEP2]] to ptr +; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4 +; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64 +; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]] +; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0 +; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; BE-NEXT: ret void +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 72 + %ld1 = load i32, ptr %gep1, align 4 + %ld1_zext = zext i32 %ld1 to i64 + %ld1_shl = shl nuw i64 %ld1_zext, 32 + %gep2 = getelementptr inbounds i8, ptr %p, i64 68 + ; Don't move final_ptr before gep2 + %final_ptr = bitcast ptr %gep2 to ptr + %ld2 = load i32, ptr %final_ptr, align 4 + %ld2_zext = zext i32 %ld2 to i64 + %or = or i64 %ld1_shl, %ld2_zext + %add = add i64 %or, 0 + %trunc = trunc i64 %add to i32 + store i32 %trunc, ptr %dest, align 4 + ret void +} Index: llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -1682,13 +1682,13 @@ define i32 @loadCombine_4consecutive_rev_mixsize1(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_rev_mixsize1( -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 -; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 -; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P2]], align 1 -; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32 -; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 2 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[L2:%.*]] = load i16, ptr [[TMP1]], align 1 +; LE-NEXT: [[TMP2:%.*]] = zext i16 [[L2]] to i32 +; LE-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16 ; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32 -; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[E1]] +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP3]], [[E1]] ; LE-NEXT: ret i32 [[O2]] ; ; BE-LABEL: @loadCombine_4consecutive_rev_mixsize1( @@ -1741,13 +1741,13 @@ ; LE-NEXT: ret i32 [[O2]] ; ; BE-LABEL: @loadCombine_4consecutive_rev_mixsize1_BE( -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 -; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 -; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P2]], align 1 -; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32 +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 2 +; BE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[L2:%.*]] = load i16, ptr [[TMP1]], align 1 +; BE-NEXT: [[TMP2:%.*]] = zext i16 [[L2]] to i32 ; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32 ; BE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16 -; BE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S1]] +; BE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S1]] ; BE-NEXT: ret i32 [[O2]] ; %p2 = getelementptr i8, ptr %p, i32 2 @@ -2005,8 +2005,8 @@ define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert3( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_badinsert3( @@ -2063,7 +2063,8 @@ ; LE-NEXT: [[CMP:%.*]] = icmp eq i8 [[C1]], 0 ; LE-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BB2:%.*]] ; LE: bb2: -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP0]], align 1 ; LE-NEXT: br label [[END]] ; LE: end: ; LE-NEXT: [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[L1]], [[BB2]] ] @@ -2194,18 +2195,18 @@ define i32 @loadCombine_4consecutive_badinsert6(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert6( ; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 ; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 ; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 ; LE-NEXT: store i8 0, ptr [[P3]], align 1 -; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1 -; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32 -; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[L3:%.*]] = load i16, ptr [[TMP1]], align 1 +; LE-NEXT: [[TMP2:%.*]] = zext i16 [[L3]] to i32 +; LE-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16 ; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 ; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 ; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP3]], [[S2]] ; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] ; LE-NEXT: ret i32 [[O3]] ; @@ -2303,3 +2304,82 @@ %o7 = or i64 %s1, %s0 ret i64 %o7 } + +define void @nested_gep(ptr %p, ptr %dest) { +; LE-LABEL: @nested_gep( +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 +; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 +; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; LE-NEXT: ret void +; +; BE-LABEL: @nested_gep( +; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72 +; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64 +; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32 +; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 64 +; BE-NEXT: [[FINAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[GEP2]], i64 4 +; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4 +; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64 +; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]] +; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0 +; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; BE-NEXT: ret void +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 72 + %ld1 = load i32, ptr %gep1, align 4 + %ld1_zext = zext i32 %ld1 to i64 + %ld1_shl = shl nuw i64 %ld1_zext, 32 + %gep2 = getelementptr inbounds i8, ptr %p, i64 64 + ; Don't move final_ptr before gep2 + %final_ptr = getelementptr inbounds i8, ptr %gep2, i64 4 + %ld2 = load i32, ptr %final_ptr, align 4 + %ld2_zext = zext i32 %ld2 to i64 + %or = or i64 %ld1_shl, %ld2_zext + %add = add i64 %or, 0 + %trunc = trunc i64 %add to i32 + store i32 %trunc, ptr %dest, align 4 + ret void +} + + +define void @bitcast_gep(ptr %p, ptr %dest) { +; LE-LABEL: @bitcast_gep( +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 +; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 +; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; LE-NEXT: ret void +; +; BE-LABEL: @bitcast_gep( +; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72 +; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64 +; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32 +; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 68 +; BE-NEXT: [[FINAL_PTR:%.*]] = bitcast ptr [[GEP2]] to ptr +; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4 +; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64 +; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]] +; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0 +; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; BE-NEXT: ret void +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 72 + %ld1 = load i32, ptr %gep1, align 4 + %ld1_zext = zext i32 %ld1 to i64 + %ld1_shl = shl nuw i64 %ld1_zext, 32 + %gep2 = getelementptr inbounds i8, ptr %p, i64 68 + ; Don't move final_ptr before gep2 + %final_ptr = bitcast ptr %gep2 to ptr + %ld2 = load i32, ptr %final_ptr, align 4 + %ld2_zext = zext i32 %ld2 to i64 + %or = or i64 %ld1_shl, %ld2_zext + %add = add i64 %or, 0 + %trunc = trunc i64 %add to i32 + store i32 %trunc, ptr %dest, align 4 + ret void +}