Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2031,11 +2031,15 @@ if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return nullptr; // Wait until our source is folded to completion. + // Only try to combine GEPs if the source has one use or is a constant GEP, + // otherwise we will duplicate index arithmetic. + if (!Src->hasOneUse() && !Src->hasAllConstantIndices()) + return nullptr; + // For constant GEPs, use a more general offset-based folding approach. // Only do this for opaque pointers, as the result element type may change. Type *PtrTy = Src->getType()->getScalarType(); - if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() && - (Src->hasOneUse() || Src->hasAllConstantIndices())) { + if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices()) { // Split Src into a variable part and a constant suffix. gep_type_iterator GTI = gep_type_begin(*Src); Type *BaseType = GTI.getIndexedType(); Index: llvm/test/Transforms/InstCombine/gepphigep.ll =================================================================== --- llvm/test/Transforms/InstCombine/gepphigep.ll +++ llvm/test/Transforms/InstCombine/gepphigep.ll @@ -54,7 +54,7 @@ ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP1]], i64 [[TMP19:%.*]] ; CHECK-NEXT: store i32 0, ptr [[TMP20]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP1]], i64 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP10]], i64 0, i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 ; CHECK-NEXT: ret i32 [[TMP25]] ; Index: llvm/test/Transforms/InstCombine/getelementptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/getelementptr.ll +++ llvm/test/Transforms/InstCombine/getelementptr.ll @@ -1285,7 +1285,7 @@ ; CHECK-LABEL: @gep_of_gep_multiuse_var_and_const( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr { i32, i32 }, ptr [[P:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: call void @use(ptr [[GEP1]]) -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr { i32, i32 }, ptr [[P]], i64 [[IDX]], i32 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr { i32, i32 }, ptr [[GEP1]], i64 0, i32 1 ; CHECK-NEXT: ret ptr [[GEP2]] ; %gep1 = getelementptr { i32, i32 }, ptr %p, i64 %idx @@ -1298,7 +1298,7 @@ ; CHECK-LABEL: @gep_of_gep_multiuse_var_and_var( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [4 x i32], ptr [[P:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: call void @use(ptr [[GEP1]]) -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr [4 x i32], ptr [[P]], i64 [[IDX]], i64 [[IDX2:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr [4 x i32], ptr [[GEP1]], i64 0, i64 [[IDX2:%.*]] ; CHECK-NEXT: ret ptr [[GEP2]] ; %gep1 = getelementptr [4 x i32], ptr %p, i64 %idx Index: llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -18,12 +18,12 @@ ; O1-NEXT: entry: ; O1-NEXT: [[VSRC23_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], ptr [[FVP]], i64 0, i32 1 ; O1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; O1-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef, i32 1 +; O1-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef +; O1-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[ARRAYIDX_I_I]], i64 0, i32 1 ; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] ; O1-NEXT: [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 ; O1-NEXT: br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; O1: for.body7.lr.ph.i: -; O1-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP0]], i64 undef ; O1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; O1-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef ; O1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]] @@ -48,12 +48,12 @@ ; O23-NEXT: entry: ; O23-NEXT: [[VSRC23_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], ptr [[FVP]], i64 0, i32 1 ; O23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; O23-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef, i32 1 +; O23-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef +; O23-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[ARRAYIDX_I_I]], i64 0, i32 1 ; O23-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] ; O23-NEXT: [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 ; O23-NEXT: br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; O23: for.body7.lr.ph.i: -; O23-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP0]], i64 undef ; O23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; O23-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef ; O23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]]