Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2433,13 +2433,21 @@ // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType); APInt Offset(OffsetBits, 0); - if (!isa(SrcOp) && GEP.accumulateConstantOffset(DL, Offset)) { + + // If the bitcast argument is an allocation, The bitcast is for convertion to + // actual type of allocation. Removing such bitcasts, results in having GEPs + // with i8* base and pure byte offsets. That means GEP is not aware of Struct + // or array hierarchy. + // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have + // a better chance to succeed + if (!isa(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) && + !isAllocationFn(SrcOp, &TLI)) { // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (!Offset) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa(SrcOp) || isAllocationFn(SrcOp, &TLI)) { + if (isa(SrcOp)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { Index: llvm/test/Transforms/InstCombine/getelementptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/getelementptr.ll +++ llvm/test/Transforms/InstCombine/getelementptr.ll @@ -1273,4 +1273,23 @@ ret i8* %gep } +declare noalias i8* @malloc(i64) nounwind + +define i32 @test_gep_bitcast_malloc(%struct.A* %a) { +; CHECK-LABEL: @test_gep_bitcast_malloc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) +; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.A* +; CHECK-NEXT: [[G3:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.A* [[B]], i64 0, i32 2 +; CHECK-NEXT: [[A_C:%.*]] = load i32, i32* [[G3]], align 4 +; CHECK-NEXT: ret i32 [[A_C]] +; +entry: + %call = call noalias i8* @malloc(i64 16) #2 + %B = bitcast i8* %call to %struct.A* + %g3 = getelementptr %struct.A, %struct.A* %B, i32 0, i32 2 + %a_c = load i32, i32* %g3, align 4 + ret i32 %a_c +} + !0 = !{!"branch_weights", i32 2, i32 10}