diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1958,6 +1958,12 @@ if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; + // LICM takes priority over canonicalization swapping for merging constant- + // indexed GEP, because mergable constant-indexed GEPs can still be merged + // after they are hoisted out of the loop, but performing canonicalization + // first may miss the LICM opportunity. + bool shouldCanonicalizeSwap = true; + if (Src->getResultElementType() == GEP.getSourceElementType() && Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && Src->hasOneUse()) { @@ -1967,6 +1973,13 @@ if (LI) { // Try to reassociate loop invariant GEP chains to enable LICM. if (Loop *L = LI->getLoopFor(GEP.getParent())) { + // If SO1 is invariant and GO1 is variant, they should not be swapped by + // canonicalization, otherwise this will go into an infinite loop with + // the swapping below. + if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1)) { + shouldCanonicalizeSwap = false; + } + // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. @@ -2015,12 +2028,43 @@ } } - // Note that if our source is a gep chain itself then we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - if (auto *SrcGEP = dyn_cast(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return nullptr; // Wait until our source is folded to completion. + // Handle malformed GEP of GEP in PR13621, where Src and GEP mutually use each + // other. This is normally not allowed, so just bail out. + if (llvm::any_of(GEP.users(), [&](User *U) { + if (Instruction *SrcI = dyn_cast(Src)) { + return U == SrcI || DT.dominates(U, SrcI); + } + return false; + })) + return nullptr; + + // Canonicalize swapping. Swap GEP with constant index suffix to the back if + // it doesn't violate def-use relations or contradict with loop invariant + // swap above. This allows more potential applications of constant-indexed GEP + // optimizations below. + if (shouldCanonicalizeSwap && Src->hasOneUse() && + (Src->getType()->getScalarType()->isOpaquePointerTy() || + Src->getPointerOperand()->getType() == + GEP.getPointerOperand()->getType())) { + // When swapping, GEP with all constant indices are more prioritized than + // GEP with only the last few indices (but not all) being constant because + // it may be merged with GEP with all constant indices. + if ((isa(*(Src->indices().end() - 1)) && + !isa(*(GEP.indices().end() - 1))) || + (Src->hasAllConstantIndices() && !GEP.hasAllConstantIndices())) { + bool InBounds = isMergedGEPInBounds(*Src, *cast(&GEP)); + Value *NewSrc = Builder.CreateGEP( + GEP.getSourceElementType(), Src->getOperand(0), + SmallVector(GEP.indices()), Src->getName()); + if (GetElementPtrInst *NewGEP = dyn_cast(NewSrc)) + NewGEP->setIsInBounds(InBounds); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + Src->getSourceElementType(), NewSrc, + SmallVector(Src->indices()), GEP.getName()); + NewGEP->setIsInBounds(InBounds); + return NewGEP; + } + } // For constant GEPs, use a more general offset-based folding approach. // Only do this for opaque pointers, as the result element type may change. @@ -2060,11 +2104,23 @@ if (!GEP.accumulateConstantOffset(DL, Offset)) return nullptr; + APInt OffsetOld = Offset; // Convert the total offset back into indices. SmallVector ConstIndices = DL.getGEPIndicesForOffset(BaseType, Offset); - if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) + if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) { + // If both GEP are constant-indexed, and cannot be merged in either way, + // convert them to a GEP of i8. + if (Src->hasAllConstantIndices()) + return isMergedGEPInBounds(*Src, *cast(&GEP)) + ? GetElementPtrInst::CreateInBounds( + Builder.getInt8Ty(), Src->getOperand(0), + Builder.getInt(OffsetOld), GEP.getName()) + : GetElementPtrInst::Create( + Builder.getInt8Ty(), Src->getOperand(0), + Builder.getInt(OffsetOld), GEP.getName()); return nullptr; + } SmallVector Indices; append_range(Indices, drop_end(Src->indices(), diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll @@ -12,9 +12,9 @@ ; result = (((i32*) p + a) + b) + 1 define ptr @basic(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @basic( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 ; CHECK-NEXT: ret ptr [[TMP3]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 @@ -26,8 +26,8 @@ ; GEP with the last index being a constant should also be swapped. define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @partialConstant1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 [[A:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 1 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1 @@ -51,10 +51,9 @@ ; result = ((i32*) p + a) + 3 define ptr @merge(ptr %p, i64 %a) { ; CHECK-LABEL: @merge( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %a @@ -65,16 +64,14 @@ ; Multiple constant-indexed GEP. Note that the first two cannot be merged at ; first, but after the second and third are merged, the result can be merged ; with the first one on the next pass. -; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9 +; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 10 define ptr @nested(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @nested( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1 -; CHECK-NEXT: ret ptr [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <3 x i32>, ptr [[TMP3]], i64 10 +; CHECK-NEXT: ret ptr [[TMP4]] ; %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1 %2 = getelementptr inbounds i8, ptr %1, i64 %a @@ -88,9 +85,9 @@ ; It is valid to swap if the source operand of the first GEP has multiple uses. define ptr @multipleUses1(ptr %p) { ; CHECK-LABEL: @multipleUses1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 ; CHECK-NEXT: ret ptr [[TMP3]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 @@ -102,8 +99,8 @@ ; It is valid to swap if the second GEP has multiple uses. define ptr @multipleUses2(ptr %p, i64 %a) { ; CHECK-LABEL: @multipleUses2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 ; CHECK-NEXT: call void @use(ptr nonnull [[TMP2]]) ; CHECK-NEXT: ret ptr [[TMP2]] ; @@ -131,8 +128,8 @@ ; after swapping. define ptr @inbounds(ptr %p, i64 %a) { ; CHECK-LABEL: @inbounds( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -56,9 +56,8 @@ ; result = (i8*) p + 14 define ptr @array2(ptr %p, i64 %a) { ; CHECK-LABEL: @array2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [7 x i32], ptr [[P:%.*]], i64 0, i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 14 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds [7 x i32], ptr %p, i64 0, i64 3 %2 = getelementptr inbounds i8, ptr %1, i64 2 @@ -68,9 +67,8 @@ ; result = (i8*) p + 20 define ptr @array3(ptr %p) { ; CHECK-LABEL: @array3( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP1]], i64 1, i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 20 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 2 %2 = getelementptr inbounds [3 x i8], ptr %1, i64 1, i64 1 @@ -80,9 +78,8 @@ ; result = (i8*) p + 36 define ptr @struct1(ptr %p) { ; CHECK-LABEL: @struct1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 36 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 3 %2 = getelementptr inbounds %struct.C, ptr %1, i64 1 @@ -141,9 +138,8 @@ ; result = &((struct.C*) p + 2).member1 define ptr @structMemberAliasing(ptr %p, i64 %a) { ; CHECK-LABEL: @structMemberAliasing( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1, i32 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 28 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, ptr %p, i64 1 %2 = getelementptr inbounds %struct.C, ptr %1, i64 1, i32 2 @@ -154,9 +150,8 @@ ; 8-bit aligned. define ptr @notDivisible(ptr %p) { ; CHECK-LABEL: @notDivisible( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i24, ptr %p, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 1 @@ -169,9 +164,8 @@ ; result = (i32*) (([4 x i32]*) p + a) + 3 define ptr @partialConstant1(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstant1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 [[A:%.*]], i64 3 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds [4 x i32], ptr %1, i64 %a, i64 2 @@ -182,8 +176,8 @@ ; constant offset is not divisible. define ptr @partialConstant2(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstant2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i64], ptr [[P:%.*]], i64 [[A:%.*]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 @@ -212,9 +206,8 @@ ; result = &((struct.C*) p + a).member2 define ptr @partialConstantMemberAliasing1(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstantMemberAliasing1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 2 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds %struct.C, ptr %1, i64 %a, i32 1 @@ -225,8 +218,8 @@ ; address of another member. define ptr @partialConstantMemberAliasing2(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstantMemberAliasing2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 1 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i8, ptr %p, i64 1 @@ -238,8 +231,8 @@ ; range of the object currently pointed by the non-constant GEP. define ptr @partialConstantMemberAliasing3(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstantMemberAliasing3( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -211,8 +211,7 @@ define ptr @geps_combinable_different_elem_type4(ptr %a) { ; CHECK-LABEL: @geps_combinable_different_elem_type4( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 10 +; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 14 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1746,10 +1746,10 @@ define void @ashr_out_of_range_1(i177* %A) { ; CHECK-LABEL: @ashr_out_of_range_1( ; CHECK-NEXT: [[L:%.*]] = load i177, i177* [[A:%.*]], align 4 -; CHECK-NEXT: [[G11:%.*]] = getelementptr i177, i177* [[A]], i64 -1 ; CHECK-NEXT: [[B24_LOBIT:%.*]] = ashr i177 [[L]], 175 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i177 [[B24_LOBIT]] to i64 -; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, i177* [[G11]], i64 [[TMP1]] +; CHECK-NEXT: [[G111:%.*]] = getelementptr i177, i177* [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, i177* [[G111]], i64 -1 ; CHECK-NEXT: store i177 0, i177* [[G62]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -44,8 +44,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8, !alias.scope !0 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !alias.scope !0 ; CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x double> [[WIDE_LOAD6]], <4 x double> poison, <4 x i32> @@ -56,8 +55,7 @@ ; CHECK-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE8]], <4 x double> poison), !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE10]], <4 x double> poison), !alias.scope !3, !noalias !0 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -762,11 +762,9 @@ ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[VEC_IND]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32>