Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1989,9 +1989,6 @@ Indices, "", IsInBounds)); } - if (Src->getResultElementType() != GEP.getSourceElementType()) - return nullptr; - SmallVector Indices; // Find out whether the last index in the source GEP is a sequential idx. @@ -2001,7 +1998,8 @@ EndsWithSequential = I.isSequential(); // Can we combine the two pointer arithmetics offsets? - if (EndsWithSequential) { + if (Src->getResultElementType() == GEP.getSourceElementType() && + EndsWithSequential) { // Replace: gep (gep %P, long B), long A, ... // With: T = long A+B; gep %P, T, ... Value *SO1 = Src->getOperand(Src->getNumOperands()-1); @@ -2031,12 +2029,31 @@ Indices.append(Src->op_begin()+1, Src->op_end()-1); Indices.push_back(Sum); Indices.append(GEP.op_begin()+2, GEP.op_end()); - } else if (isa(*GEP.idx_begin()) && + } else if (Src->getResultElementType() == GEP.getSourceElementType() && + isa(*GEP.idx_begin()) && cast(*GEP.idx_begin())->isNullValue() && Src->getNumOperands() != 1) { // Otherwise we can do the fold if the first index of the GEP is a zero Indices.append(Src->op_begin()+1, Src->op_end()); Indices.append(GEP.idx_begin()+1, GEP.idx_end()); + } else { + Indices.append(Src->op_begin()+1, Src->op_end()); + + // The indices of GEP may have been folded; in such case try to get + // the types to match by appending zeros to the indices of Src + Type *SrcType = Src->getResultElementType(); + while (SrcType && SrcType->isAggregateType() && + SrcType != GEP.getSourceElementType()) { + Indices.push_back(Builder.getInt32(0)); + SrcType = GetElementPtrInst::getIndexedType( + Src->getSourceElementType(), Indices); + } + if (SrcType == Src->getResultElementType() || + SrcType != GEP.getSourceElementType()) + return nullptr; + + Indices.pop_back(); + Indices.append(GEP.idx_begin(), GEP.idx_end()); } if (!Indices.empty()) Index: llvm/test/Transforms/InstCombine/gepgep-after-fold.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/gepgep-after-fold.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32-S128" + +%s1 = type [3 x [3 x i32]] +%s2 = type [3 x [3 x [3 x i32]]] + +define void @test_unfold_one(ptr %x, i32 %i, i32 %j) { +; CHECK-LABEL: @test_unfold_one( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [3 x [3 x i32]], ptr [[X:%.*]], i32 [[I:%.*]], i32 2, i32 [[J:%.*]] +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %src = getelementptr inbounds %s1, ptr %x, i32 %i, i32 2 + %gep = getelementptr inbounds i32, ptr %src, i32 %j + store i32 1, ptr %gep + ret void +} + +define void @test_unfold_one2(ptr %x, i32 %i, i32 %j) { +; CHECK-LABEL: @test_unfold_one2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [3 x [3 x [3 x i32]]], ptr [[X:%.*]], i32 [[I:%.*]], i32 2, i32 [[J:%.*]] +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %src = getelementptr inbounds %s2, ptr %x, i32 %i, i32 2 + %gep = getelementptr inbounds [3 x i32], ptr %src, i32 %j + store i32 1, ptr %gep + ret void +} + +define void @test_unfold_two(ptr %x, i32 %i, i32 %j) { +; CHECK-LABEL: @test_unfold_two( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [3 x [3 x [3 x i32]]], ptr [[X:%.*]], i32 [[I:%.*]], i32 2, i32 0, i32 [[J:%.*]] +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %src = getelementptr inbounds %s2, ptr %x, i32 %i, i32 2 + %gep = getelementptr inbounds i32, ptr %src, i32 %j + store i32 1, ptr %gep + ret void +} + +define void @test_negative(ptr %x, i32 %i, i32 %j) { +; CHECK-LABEL: @test_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds [3 x [3 x i32]], ptr [[X:%.*]], i32 [[I:%.*]], i32 2 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [3 x i32], ptr [[SRC]], i32 [[J:%.*]] +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %src = getelementptr inbounds %s1, ptr %x, i32 %i, i32 2 + ; should be %gep = getelementptr inbounds [3 x i32], ptr %src, i32 0, i32 %j + %gep = getelementptr inbounds [3 x i32], ptr %src, i32 %j + store i32 1, ptr %gep + ret void +}