Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -296,6 +296,7 @@ AssumptionCache &AC; TargetLibraryInfo &TLI; DominatorTree &DT; + TargetTransformInfo &TTI; const DataLayout &DL; const SimplifyQuery SQ; OptimizationRemarkEmitter &ORE; @@ -310,11 +311,12 @@ InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, + TargetTransformInfo &TTI , OptimizationRemarkEmitter &ORE, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {} + TTI(TTI), DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {} /// Run the combiner over the entire worklist until it is empty. /// Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -57,6 +57,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -1676,6 +1677,15 @@ I != E; ++I) EndsWithSequential = I.isSequential(); + if (isa(Src) && !Src->hasOneUse() && + TTI.getUserCost(Src) != TargetTransformInfo::TCC_Free) { + BasicBlock *CurBB = GEP.getParent(); + for (User *U : Src->users()) + if (Instruction *UI = dyn_cast(U)) + if (UI->getParent() != CurBB) + return nullptr; + } + // Can we combine the two pointer arithmetics offsets? if (EndsWithSequential) { // Replace: gep (gep %P, long B), long A, ... @@ -3257,6 +3267,7 @@ static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, + TargetTransformInfo &TTI, OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); @@ -3288,7 +3299,7 @@ MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, - AC, TLI, DT, ORE, DL, LI); + AC, TLI, DT, TTI, ORE, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -3304,11 +3315,12 @@ auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &ORE = AM.getResult(F); + auto &TTI = AM.getResult(F); auto *LI = AM.getCachedResult(F); auto *AA = &AM.getResult(F); - if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, TTI, ORE, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3329,6 +3341,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -3345,12 +3358,13 @@ auto &TLI = getAnalysis().getTLI(); auto &DT = getAnalysis().getDomTree(); auto &ORE = getAnalysis().getORE(); + auto &TTI = getAnalysis().getTTI(F); // Optional analyses. auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, TTI, ORE, ExpensiveCombines, LI); } @@ -3364,6 +3378,7 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) Index: llvm/test/Transforms/InstCombine/unit_skip_gep_merge.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/unit_skip_gep_merge.ll @@ -0,0 +1,28 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +%ST = type {i8*, i8*, i8*} +declare void @bar(i8*) + +; CHECK-LABEL: @foo +; CHECK-LABEL: entry +; CHECK: %tmp1 = getelementptr inbounds %ST, %ST* %arrayidx8 +; CHECK-LABEL: BB0 +; CHECK: %tmp2 = getelementptr inbounds %ST, %ST* %arrayidx8 +define void @foo(%ST* %B, i1 %c, i64 %v, i8** %S, i8** %S2) { +entry: + %arrayidx8 = getelementptr inbounds %ST, %ST* %B, i64 %v + %tmp1 = getelementptr inbounds %ST, %ST* %arrayidx8, i64 0, i32 1 + %r = load i8*, i8** %tmp1 + store i8* %r, i8** %S2 + br i1 %c, label %BB0, label %BB2 + +BB0: + %tmp2 = getelementptr inbounds %ST, %ST* %arrayidx8, i64 0, i32 2 + %l = load i8*, i8** %tmp2 + store i8* %l, i8** %S + br label %BB2 + +BB2: + ret void +} +