Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -791,8 +791,12 @@ if (!Allowed || !Fast) return false; - // Make sure the Load pointer of type GEP/non-GEP is above insert point + // Nested GEP or BitCast. Should have been handled in InstCombine. Instruction *Inst = dyn_cast(LI1->getPointerOperand()); + if (Inst && (isa((Inst)->getOperand(0)) || isa((Inst)->getOperand(0)))) + return false; + + // Make sure the Load pointer of type GEP/non-GEP is above insert point if (Inst && Inst->getParent() == LI1->getParent() && !Inst->comesBefore(LOps.RootInsert)) Inst->moveBefore(LOps.RootInsert); Index: llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -2085,3 +2085,102 @@ %o3 = or i32 %o2, %e1 ret i32 %o3 } + +define void @nested_gep(ptr %p, ptr %dest) { +; ALL-LABEL: @nested_gep( +; ALL-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72 +; ALL-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; ALL-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64 +; ALL-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32 +; ALL-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 64 +; ALL-NEXT: [[FINAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[GEP2]], i64 4 +; ALL-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4 +; ALL-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64 +; ALL-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]] +; ALL-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0 +; ALL-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; ALL-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; ALL-NEXT: ret void +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 72 + %ld1 = load i32, ptr %gep1, align 4 + %ld1_zext = zext i32 %ld1 to i64 + %ld1_shl = shl nuw i64 %ld1_zext, 32 + %gep2 = getelementptr inbounds i8, ptr %p, i64 64 + ; Don't move final_ptr before gep2 + %final_ptr = getelementptr inbounds i8, ptr %gep2, i64 4 + %ld2 = load i32, ptr %final_ptr, align 4 + %ld2_zext = zext i32 %ld2 to i64 + %or = or i64 %ld1_shl, %ld2_zext + %add = add i64 %or, 0 + %trunc = trunc i64 %add to i32 + store i32 %trunc, ptr %dest, align 4 + ret void +} + + +define void @bitcast_gep(ptr %p, ptr %dest) { +; ALL-LABEL: @bitcast_gep( +; ALL-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72 +; ALL-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; ALL-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64 +; ALL-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32 +; ALL-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 68 +; ALL-NEXT: [[FINAL_PTR:%.*]] = bitcast ptr [[GEP2]] to ptr +; ALL-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4 +; ALL-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64 +; ALL-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]] +; ALL-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0 +; ALL-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; ALL-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; ALL-NEXT: ret void +; + %gep1 = getelementptr inbounds i8, ptr %p, i64 72 + %ld1 = load i32, ptr %gep1, align 4 + %ld1_zext = zext i32 %ld1 to i64 + %ld1_shl = shl nuw i64 %ld1_zext, 32 + %gep2 = getelementptr inbounds i8, ptr %p, i64 68 + ; Don't move final_ptr before gep2 + %final_ptr = bitcast ptr %gep2 to ptr + %ld2 = load i32, ptr %final_ptr, align 4 + %ld2_zext = zext i32 %ld2 to i64 + %or = or i64 %ld1_shl, %ld2_zext + %add = add i64 %or, 0 + %trunc = trunc i64 %add to i32 + store i32 %trunc, ptr %dest, align 4 + ret void +} + +define void @bitcast_gep_typed(i8* %p, i32* %dest) { +; ALL-LABEL: @bitcast_gep_typed( +; ALL-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72 +; ALL-NEXT: [[GEP1_CAST:%.*]] = bitcast ptr [[GEP1]] to ptr +; ALL-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1_CAST]], align 4 +; ALL-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64 +; ALL-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32 +; ALL-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 68 +; ALL-NEXT: [[GEP2_CAST:%.*]] = bitcast ptr [[GEP2]] to ptr +; ALL-NEXT: [[LD2:%.*]] = load i32, ptr [[GEP2_CAST]], align 4 +; ALL-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64 +; ALL-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]] +; ALL-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0 +; ALL-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; ALL-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 +; ALL-NEXT: ret void +; + %gep1 = getelementptr inbounds i8, i8* %p, i64 72 + %gep1_cast = bitcast i8* %gep1 to i32* + %ld1 = load i32, i32* %gep1_cast, align 4 + %ld1_zext = zext i32 %ld1 to i64 + %ld1_shl = shl nuw i64 %ld1_zext, 32 + %gep2 = getelementptr inbounds i8, i8* %p, i64 68 + ; Don't move gep2_cast before gep2 + %gep2_cast = bitcast i8* %gep2 to i32* + %ld2 = load i32, i32* %gep2_cast, align 4 + %ld2_zext = zext i32 %ld2 to i64 + %or = or i64 %ld1_shl, %ld2_zext + %add = add i64 %or, 0 + %trunc = trunc i64 %add to i32 + store i32 %trunc, i32* %dest, align 4 + ret void +}