Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -75,6 +75,10 @@ STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); +static cl::opt + AggressiveGepMerging("aggr-gep-merging", cl::init(false), cl::Hidden, + cl::desc("Enable GEP merging for most cases.")); + Value *InstCombiner::EmitGEPOffset(User *GEP) { return llvm::EmitGEPOffset(Builder, DL, GEP); } @@ -1432,13 +1436,22 @@ if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; - // Note that if our source is a gep chain itself then we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - if (GEPOperator *SrcGEP = - dyn_cast(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return nullptr; // Wait until our source is folded to completion. + if (AggressiveGepMerging) { + // Note that if our source is a gep chain itself then we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + if (GEPOperator *SrcGEP = dyn_cast(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) + return nullptr; // Wait until our source is folded to completion. + } else { + // Don't combine the malformed cycle gep instructions like the following: + // %gep2 = getelementptr i8, i8* %gep, i32 1 + // %gep = getelementptr i8, i8* %gep2, i32 1 + GetElementPtrInst *SrcGEP = + dyn_cast(Src->getOperand(0)); + if (SrcGEP == &GEP) + return nullptr; + } SmallVector Indices; @@ -1467,10 +1480,16 @@ // normalized. if (SO1->getType() != GO1->getType()) return nullptr; - // Only do the combine when GO1 and SO1 are both constants. Only in - // this case, we are sure the cost after the merge is never more than - // that before the merge. - if (!isa(GO1) || !isa(SO1)) + // If !AggressiveGepMerging, do gep(gep(...)) combine only when + // 1. GO1 and SO1 are both constants or + // 2. Src has only one use plus that Src and GEP are in the same BB. + // In thses cases we are sure the cost of the combined result will be + // equal or less than before. + if (!AggressiveGepMerging && + (!isa(GO1) || !isa(SO1)) && + (!Src->hasOneUse() || + (isa(Src) && + cast(Src)->getParent() != GEP.getParent()))) return nullptr; Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } Index: test/Transforms/InstCombine/gep-merge1.ll =================================================================== --- test/Transforms/InstCombine/gep-merge1.ll +++ test/Transforms/InstCombine/gep-merge1.ll @@ -0,0 +1,76 @@ +; PR23580 +; RUN: opt < %s -O2 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.anon = type { [0 x %class.C] } +%class.C = type { i8 } +%struct.B = type { i16 } +%class.G = type <{ %struct.F, [2 x i32], i8, [7 x i8] }> +%struct.F = type { i8, i8, i8, i16, i32* } + +@a = global i32 0, align 4 +@d = internal global %struct.anon zeroinitializer, align 1 + +declare %struct.B* @_ZN1C5m_fn1Ev(%class.C*) + +; Check geps inside for.body are merged so loop vectorizer can recognize loads +; inside for.body to be inter-iterations consecutive, and generate %wide.loads. +; +; CHECK-LABEL: @fn2( +; CHECK: %wide.load{{[0-9]*}} = +; CHECK: %wide.load{{[0-9]*}} = +; CHECK: %wide.load{{[0-9]*}} = + +define void @fn2(%class.G* nocapture readonly %this, i1 zeroext %arg) align 2 { +entry: + br label %for.cond + +for.cond: + %tmp1 = load i32, i32* @a, align 4 + %idxprom = sext i32 %tmp1 to i64 + %arrayidx3 = getelementptr inbounds [0 x %class.C], [0 x %class.C]* getelementptr inbounds (%struct.anon, %struct.anon* @d, i32 0, i32 0), i32 0, i64 %idxprom + %call = call %struct.B* @_ZN1C5m_fn1Ev(%class.C* %arrayidx3) + %tmp4 = load i32, i32* @a, align 4 + %idx.ext = sext i32 %tmp4 to i64 + %add.ptr = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idx.ext + %tobool = trunc i32 %tmp4 to i1 + br i1 %tobool, label %for.cond13, label %if.else30 + +for.cond13: ; preds = %for.body, %if.then + %k.0 = phi i32 [ 1, %for.cond ], [ %add, %for.body ] + %cmp14 = icmp slt i32 %k.0, %tmp4 + br i1 %cmp14, label %for.body, label %if.else30, !llvm.loop !0 + +for.body: ; preds = %for.cond13 + %idxprom15 = sext i32 %k.0 to i64 + %arrayidx16 = getelementptr inbounds %struct.B, %struct.B* %add.ptr, i64 %idxprom15 + %ival = getelementptr inbounds %struct.B, %struct.B* %arrayidx16, i32 0, i32 0 + %tmp9 = load i16, i16* %ival, align 2 + %conv17 = sext i16 %tmp9 to i32 + %add = add nsw i32 %k.0, 1 + %idxprom18 = sext i32 %add to i64 + %arrayidx19 = getelementptr inbounds %struct.B, %struct.B* %add.ptr, i64 %idxprom18 + %ival20 = getelementptr inbounds %struct.B, %struct.B* %arrayidx19, i32 0, i32 0 + %tmp10 = load i16, i16* %ival20, align 2 + %conv21 = sext i16 %tmp10 to i32 + %add22 = add nsw i32 %conv17, %conv21 + %mul = mul nsw i32 %tmp1, %add22 + %add23 = add nsw i32 %tmp4, %mul + %shr = ashr i32 %add23, %tmp4 + %arrayidx25 = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idxprom15 + %ival26 = getelementptr inbounds %struct.B, %struct.B* %arrayidx25, i32 0, i32 0 + %tmp11 = load i16, i16* %ival26, align 2 + %conv27 = sext i16 %tmp11 to i32 + %sub = sub nsw i32 %conv27, %shr + %conv28 = trunc i32 %sub to i16 + store i16 %conv28, i16* %ival26, align 2 + br label %for.cond13 + +if.else30: + br label %for.cond +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.width", i32 4} Index: test/Transforms/InstCombine/gep-merge2.ll =================================================================== --- test/Transforms/InstCombine/gep-merge2.ll +++ test/Transforms/InstCombine/gep-merge2.ll @@ -0,0 +1,48 @@ +; This test makes sure that gep(gep ...) merge doesn't come into effect. +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Make sure there are no geps being merged. +; CHECK-LABEL: @fn3( +; CHECK: getelementptr +; CHECK: getelementptr +; CHECK: getelementptr + +@_ZN2cv1aE = global i8* zeroinitializer, align 8 +declare i32 @fn1() #2 +declare i32 @fn2() #2 + +; Function Attrs: uwtable +define linkonce_odr i32 @fn3() { +entry: + %call = call i32 @fn1() + %call1 = call i32 @fn2() + %0 = load i8*, i8** @_ZN2cv1aE, align 8 + %idx.ext2 = sext i32 %call1 to i64 + %add.ptr3 = getelementptr inbounds i8, i8* %0, i64 %idx.ext2 + br label %for.cond5 + +for.cond5: + %total1 = phi i32 [ 0, %entry ], [ %total2, %for.body7 ] + %x.1 = phi i32 [ 0, %entry ], [ %inc, %for.body7 ] + %cmp6 = icmp slt i32 %x.1, %call + br i1 %cmp6, label %for.body7, label %for.cond34 + +for.body7: ; preds = %for.cond5 + %mul = mul nsw i32 %x.1, 2 + %idxprom = sext i32 %mul to i64 + %arrayidx = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom + %1 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %1 to i32 + %sub = sub nsw i32 %mul, 1 + %idxprom10 = sext i32 %sub to i64 + %arrayidx11 = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom10 + %2 = load i8, i8* %arrayidx11, align 1 + %conv2 = zext i8 %2 to i32 + %add1 = add nsw i32 %conv, %conv2 + %total2 = add nsw i32 %total1, %add1 + %inc = add nsw i32 %x.1, 1 + br label %for.cond5 + +for.cond34: + ret i32 %total1 +} Index: test/Transforms/InstCombine/getelementptr.ll =================================================================== --- test/Transforms/InstCombine/getelementptr.ll +++ test/Transforms/InstCombine/getelementptr.ll @@ -104,8 +104,8 @@ %B = getelementptr i32, i32* %A, i64 %D ret i32* %B ; CHECK-LABEL: @test7( -; CHECK: %A = getelementptr i32, i32* %I, i64 %C -; CHECK: %B = getelementptr i32, i32* %A, i64 %D +; CHECK: %A.sum = add i64 %C, %D +; CHECK: getelementptr i32, i32* %I, i64 %A.sum } define i8* @test8([10 x i32]* %X) {