Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1955,13 +1955,11 @@ // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction with matching element type, combine the // indices of the two getelementptr instructions into a single instruction. - if (Src->getResultElementType() != GEP.getSourceElementType()) - return nullptr; - if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; - if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && + if (Src->getResultElementType() == GEP.getSourceElementType() && + Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && Src->hasOneUse()) { Value *GO1 = GEP.getOperand(1); Value *SO1 = Src->getOperand(1); @@ -2024,6 +2022,68 @@ if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return nullptr; // Wait until our source is folded to completion. + // For constant GEPs, use a more general offset-based folding approach. + // Only do this for opaque pointers, as the result element type may change. + Type *PtrTy = Src->getType()->getScalarType(); + if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() && + Src->hasOneUse()) { + // Split Src into a variable part and a constant suffix. + gep_type_iterator GTI = gep_type_begin(*Src); + Type *BaseType = GTI.getIndexedType(); + bool IsFirstType = true; + unsigned NumVarIndices = 0; + for (auto Pair : enumerate(Src->indices())) { + if (!isa(Pair.value())) { + BaseType = GTI.getIndexedType(); + IsFirstType = false; + NumVarIndices = Pair.index() + 1; + } + ++GTI; + } + + // Determine the offset for the constant suffix of Src. + APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0); + if (NumVarIndices != Src->getNumIndices()) { + // FIXME: getIndexedOffsetInType() does not handled scalable vectors. + if (isa(BaseType)) + return nullptr; + + SmallVector ConstantIndices; + if (!IsFirstType) + ConstantIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(GEP.getContext()))); + append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices)); + Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices); + } + + // Add the offset for GEP (which is fully constant). + if (!GEP.accumulateConstantOffset(DL, Offset)) + return nullptr; + + // Convert the total offset back into indices. + SmallVector ConstIndices = + DL.getGEPIndicesForOffset(BaseType, Offset); + if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) + return nullptr; + + SmallVector Indices; + append_range(Indices, drop_end(Src->indices(), + Src->getNumIndices() - NumVarIndices)); + for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) + Indices.push_back(ConstantInt::get(GEP.getContext(), Idx)); + + return isMergedGEPInBounds(*Src, *cast(&GEP)) + ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()) + : GetElementPtrInst::Create(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()); + } + + if (Src->getResultElementType() != GEP.getSourceElementType()) + return nullptr; + SmallVector Indices; // Find out whether the last index in the source GEP is a sequential idx. Index: llvm/test/Transforms/InstCombine/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -181,8 +181,7 @@ define ptr @geps_combinable_different_elem_type1(ptr %a) { ; CHECK-LABEL: @geps_combinable_different_elem_type1( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A2]], i64 0, i32 1 +; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 1 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 @@ -192,8 +191,7 @@ define ptr @geps_combinable_different_elem_type2(ptr %a) { ; CHECK-LABEL: @geps_combinable_different_elem_type2( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 4 +; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 1 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 @@ -203,8 +201,7 @@ define ptr @geps_combinable_different_elem_type3(ptr %a) { ; CHECK-LABEL: @geps_combinable_different_elem_type3( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 8 +; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 1, i32 1 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 @@ -225,9 +222,7 @@ define ptr @geps_combinable_different_elem_type5(ptr %a) { ; CHECK-LABEL: @geps_combinable_different_elem_type5( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 -4 -; CHECK-NEXT: ret ptr [[A3]] +; CHECK-NEXT: ret ptr [[A:%.*]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 %a3 = getelementptr i8, ptr %a2, i64 -4 @@ -236,8 +231,7 @@ define ptr @geps_combinable_different_elem_type6(ptr %a, i64 %idx) { ; CHECK-LABEL: @geps_combinable_different_elem_type6( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[IDX:%.*]] -; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 4 +; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[IDX:%.*]], i32 1 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i64 %idx @@ -269,8 +263,7 @@ define ptr @geps_combinable_different_elem_type9(ptr %a, i64 %idx) { ; CHECK-LABEL: @geps_combinable_different_elem_type9( -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds { { i32, i32 } }, ptr [[A:%.*]], i64 [[IDX:%.*]], i32 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 -4 +; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds { { i32, i32 } }, ptr [[A:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr inbounds { { i32, i32 } }, ptr %a, i64 %idx, i32 0, i32 1