diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2025,60 +2025,91 @@ // For constant GEPs, use a more general offset-based folding approach. // Only do this for opaque pointers, as the result element type may change. Type *PtrTy = Src->getType()->getScalarType(); - if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() && + if (PtrTy->isOpaquePointerTy() && (Src->hasOneUse() || Src->hasAllConstantIndices())) { - // Split Src into a variable part and a constant suffix. - gep_type_iterator GTI = gep_type_begin(*Src); - Type *BaseType = GTI.getIndexedType(); - bool IsFirstType = true; - unsigned NumVarIndices = 0; - for (auto Pair : enumerate(Src->indices())) { - if (!isa(Pair.value())) { - BaseType = GTI.getIndexedType(); - IsFirstType = false; - NumVarIndices = Pair.index() + 1; - } - ++GTI; - } + // Try to merge GEP (Back of BB) into Src (Front of BB) first, if that + // fails, swap the two GEP whenever valid and try again. This is valid as + // long as GEP's indices do not depend on Src. + GetElementPtrInst *Back = &GEP; + GetElementPtrInst *Front = cast(Src); + for (int i = 0; i < 2; std::swap(Back, Front), i++) { + // Swap is not valid if GEP's indices depend on Src. For example + // a = gep x, 1 + // b = gep a, func(a), 1 + // Also if Src is used by other instructions than GEP, merging constant- + // indexed Src to non-constant GEP does not reduce instruction count. + if (Front == &GEP && !Src->hasOneUse()) + break; - // Determine the offset for the constant suffix of Src. - APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0); - if (NumVarIndices != Src->getNumIndices()) { - // FIXME: getIndexedOffsetInType() does not handled scalable vectors. - if (isa(BaseType)) - return nullptr; + if (Back->hasAllConstantIndices()) { + // Split Front into a variable part and a constant suffix. + gep_type_iterator GTI = gep_type_begin(Front); + Type *BaseType = GTI.getIndexedType(); + bool IsFirstType = true; + unsigned NumVarIndices = 0; + for (auto Pair : enumerate(Front->indices())) { + if (!isa(Pair.value())) { + BaseType = GTI.getIndexedType(); + IsFirstType = false; + NumVarIndices = Pair.index() + 1; + } + ++GTI; + } - SmallVector ConstantIndices; - if (!IsFirstType) - ConstantIndices.push_back( - Constant::getNullValue(Type::getInt32Ty(GEP.getContext()))); - append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices)); - Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices); - } + // Determine the offset for the constant suffix of Front. + APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0); + if (NumVarIndices != Front->getNumIndices()) { + // FIXME: getIndexedOffsetInType() does not handle scalable vectors. + if (isa(BaseType)) + continue; - // Add the offset for GEP (which is fully constant). - if (!GEP.accumulateConstantOffset(DL, Offset)) - return nullptr; + SmallVector ConstantIndices; + if (!IsFirstType) + ConstantIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(GEP.getContext()))); + append_range(ConstantIndices, + drop_begin(Front->indices(), NumVarIndices)); + Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices); + } - // Convert the total offset back into indices. - SmallVector ConstIndices = - DL.getGEPIndicesForOffset(BaseType, Offset); - if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) - return nullptr; + // Add the offset for Back (which is fully constant). + if (!Back->accumulateConstantOffset(DL, Offset)) + continue; - SmallVector Indices; - append_range(Indices, drop_end(Src->indices(), - Src->getNumIndices() - NumVarIndices)); - for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) - Indices.push_back(ConstantInt::get(GEP.getContext(), Idx)); + APInt OffsetOld = Offset; + // Convert the total offset back into indices. + SmallVector ConstIndices = + DL.getGEPIndicesForOffset(BaseType, Offset); + if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) { + // If both GEP are constant-indexed, and cannot be merged in either + // way, convert them to a GEP of i8. + if (Front == &GEP && Front->hasAllConstantIndices()) { + return isMergedGEPInBounds(*Src, *cast(&GEP)) + ? GetElementPtrInst::CreateInBounds( + Builder.getInt8Ty(), Src->getOperand(0), + Builder.getInt(OffsetOld), GEP.getName()) + : GetElementPtrInst::Create( + Builder.getInt8Ty(), Src->getOperand(0), + Builder.getInt(OffsetOld), GEP.getName()); + } + continue; + } - return isMergedGEPInBounds(*Src, *cast(&GEP)) - ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(), - Src->getOperand(0), Indices, - GEP.getName()) - : GetElementPtrInst::Create(Src->getSourceElementType(), - Src->getOperand(0), Indices, - GEP.getName()); + SmallVector Indices; + append_range(Indices, drop_end(Front->indices(), + Front->getNumIndices() - NumVarIndices)); + for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) + Indices.push_back(ConstantInt::get(GEP.getContext(), Idx)); + + return isMergedGEPInBounds(*Src, *cast(&GEP)) + ? GetElementPtrInst::CreateInBounds( + Front->getSourceElementType(), Src->getOperand(0), + Indices, Back->getName()) + : GetElementPtrInst::Create(Front->getSourceElementType(), + Src->getOperand(0), Indices, + Back->getName()); + } + } } if (Src->getResultElementType() != GEP.getSourceElementType()) diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -36,9 +36,8 @@ ; result = (i8*) p + 10 define ptr @mergeReverse(ptr %p) { ; CHECK-LABEL: @mergeReverse( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 1 %2 = getelementptr inbounds i8, ptr %1, i64 2 @@ -70,9 +69,8 @@ ; result = (i8*) p + 14 define ptr @array2(ptr %p, i64 %a) { ; CHECK-LABEL: @array2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [7 x i32], ptr [[P:%.*]], i64 0, i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 14 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds [7 x i32], ptr %p, i64 0, i64 3 %2 = getelementptr inbounds i8, ptr %1, i64 2 @@ -82,9 +80,8 @@ ; result = (i8*) (([3 x i8]*) p + 6) + 2 define ptr @array3(ptr %p) { ; CHECK-LABEL: @array3( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP1]], i64 1, i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x i8], ptr [[P:%.*]], i64 6, i64 2 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 2 %2 = getelementptr inbounds [3 x i8], ptr %1, i64 1, i64 1 @@ -94,9 +91,8 @@ ; result = (struct.C*) p + 3 define ptr @struct1(ptr %p) { ; CHECK-LABEL: @struct1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 3 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 3 %2 = getelementptr inbounds %struct.C, ptr %1, i64 1 @@ -127,9 +123,8 @@ ; result = ((struct.C*) p + 1).member2 define ptr @struct4(ptr %p, i64 %a) { ; CHECK-LABEL: @struct4( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 1, i32 2 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 1 %2 = getelementptr inbounds %struct.C, ptr %1, i64 1 @@ -164,9 +159,8 @@ ; result = (i8*) &((struct.A*) &((struct.B*) p)[0].member2).member0 + 2 define ptr @appendIndexReverse(ptr %p) { ; CHECK-LABEL: @appendIndexReverse( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP1]], i64 0, i32 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 2 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 1 %2 = getelementptr inbounds %struct.B, ptr %1, i64 0, i32 1 @@ -178,22 +172,20 @@ ; result = &((struct.C*) p + 2).member1 define ptr @structMemberAliasing(ptr %p, i64 %a) { ; CHECK-LABEL: @structMemberAliasing( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1, i32 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 2, i32 1 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 1 %2 = getelementptr inbounds %struct.C, ptr %1, i64 1, i32 2 ret ptr %2 } -; Negative test. Offset of either GEP is not divisible by the other's size. +; GEP cannot be merged either direction, but can be canonicalized as GEP of i8. ; Here i24 is 8-bit aligned. define ptr @notDivisible(ptr %p) { ; CHECK-LABEL: @notDivisible( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i24, ptr %p, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 1 @@ -206,9 +198,8 @@ ; result = (i32*) (([4 x i32]*) p + a) + 3 define ptr @partialConstant1(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstant1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 [[A:%.*]], i64 3 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds [4 x i32], ptr %1, i64 %a, i64 2 @@ -249,9 +240,8 @@ ; result = &((struct.C*) p + a).member2 define ptr @partialConstantMemberAliasing1(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstantMemberAliasing1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 2 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds %struct.C, ptr %1, i64 %a, i32 1 diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -211,8 +211,7 @@ define ptr @geps_combinable_different_elem_type4(ptr %a) { ; CHECK-LABEL: @geps_combinable_different_elem_type4( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 10 +; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 14 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1