diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -149,6 +149,7 @@ Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src); + Instruction *swapGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src); Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); Instruction *visitAllocSite(Instruction &FI); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1978,13 +1978,6 @@ } } - // Note that if our source is a gep chain itself then we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - if (auto *SrcGEP = dyn_cast(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return nullptr; // Wait until our source is folded to completion. - // For constant GEPs, use a more general offset-based folding approach. // Only do this for opaque pointers, as the result element type may change. Type *PtrTy = Src->getType()->getScalarType(); @@ -2125,6 +2118,33 @@ return nullptr; } +Instruction *InstCombinerImpl::swapGEPOfGEP(GetElementPtrInst &GEP, + GEPOperator *Src) { + // If GEP of GEP cannot be combined into one instruction, and the second GEP + // is constant-indexed, we perform canonicalize swapping to move it before the + // non-constant-indexed GEP. This potentially allows the application of some + // optimizations in visitGEPofGEP. + // Only swap if it doesn't violate use-def rule, and pointer types are + // compatible (opaque ptr or GEP and Src must be same type, meaning they must + // both have 1 index). + if (Src->hasOneUse() && + ((Src->getPointerOperandType()->isOpaquePointerTy() && + GEP.getPointerOperandType()->isOpaquePointerTy()) || + (Src->getNumIndices() == 1 && GEP.getNumIndices() == 1)) && + !Src->hasAllConstantIndices() && GEP.hasAllConstantIndices()) { + // Cannot guarantee inbounds after swapping because the non-const GEP can + // have arbitrary sign. + Value *NewSrc = + Builder.CreateGEP(GEP.getSourceElementType(), Src->getOperand(0), + SmallVector(GEP.indices()), Src->getName()); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + Src->getSourceElementType(), NewSrc, + SmallVector(Src->indices()), GEP.getName()); + return NewGEP; + } + return nullptr; +} + // Note that we may have also stripped an address space cast in between. Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP) { @@ -2409,9 +2429,12 @@ PtrOp = NewGEP; } - if (auto *Src = dyn_cast(PtrOp)) + if (auto *Src = dyn_cast(PtrOp)) { if (Instruction *I = visitGEPOfGEP(GEP, Src)) return I; + else if (Instruction *I = swapGEPOfGEP(GEP, Src)) + return I; + } // Skip if GEP source element type is scalable. The type alloc size is unknown // at compile-time. diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll @@ -1,12 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s +; RUN: opt < %s -instcombine -licm -opaque-pointers -S | FileCheck %s ; Constant-indexed GEP instructions in a chain of GEP instructions should be -; swapped to the end whenever such transformation is valid. This allows them to -; be merged. - -declare void @use(i1) +; swapped to the front whenever such transformation is valid. This allows them +; to be merged. +declare void @use(ptr) ; The constant-indexed GEP instruction should be swapped to the end, even ; without merging. @@ -24,36 +23,25 @@ ret ptr %3 } -; GEP with the last index being a constant should also be swapped. -define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) { -; CHECK-LABEL: @partialConstant1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] -; CHECK-NEXT: ret ptr [[TMP1]] -; - %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1 - %2 = getelementptr inbounds i32, ptr %p, i64 %b - ret ptr %2 -} - -; Negative test. GEP should not be swapped if the last index is not a constant. -define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) { -; CHECK-LABEL: @partialConstant2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] -; CHECK-NEXT: ret ptr [[TMP1]] +; Negative test. GEP should not be swapped if indices are not constant. +define ptr @partialConstant(ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @partialConstant( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a - %2 = getelementptr inbounds i32, ptr %p, i64 %b + %2 = getelementptr inbounds i32, ptr %1, i64 %b ret ptr %2 } -; Constant-indexed GEP are merged after swawpping. +; Constant-indexed GEP are merged after swapping. ; result = ((i32*) p + a) + 3 define ptr @merge(ptr %p, i64 %a) { ; CHECK-LABEL: @merge( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %a @@ -61,67 +49,85 @@ ret ptr %3 } -; Multiple constant-indexed GEP. Note that the first two cannot be merged at -; first, but after the second and third are merged, the result can be merged -; with the first one on the next pass. -; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9 +; Multiple constant-indexed GEP. All constant-index GEP will eventually be +; swapped to the front and merged. +; result = (i16*) (((i8*) p + 25) + a) + b define ptr @nested(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @nested( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1 -; CHECK-NEXT: ret ptr [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 25 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP3]] ; - %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1 + %1 = getelementptr inbounds i64, ptr %p, i64 1 %2 = getelementptr inbounds i8, ptr %1, i64 %a - %3 = mul i64 %a, %b - %4 = getelementptr inbounds <5 x i32>, ptr %2, i64 4 - %5 = getelementptr inbounds i16, ptr %4, i64 %3 - %6 = getelementptr inbounds <4 x i32>, ptr %5, i64 1 - ret ptr %6 + %3 = getelementptr inbounds i32, ptr %2, i64 4 + %4 = getelementptr inbounds i16, ptr %3, i64 %b + %5 = getelementptr inbounds i8, ptr %4, i64 1 + ret ptr %5 } ; It is valid to swap if the source operand of the first GEP has multiple uses. -define ptr @multipleUses1(ptr %p) { +define ptr @multipleUses1(ptr %p, i64 %a) { ; CHECK-LABEL: @multipleUses1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: ret ptr [[TMP2]] ; - %1 = getelementptr inbounds i32, ptr %p, i64 1 - %2 = ptrtoint ptr %p to i64 - %3 = getelementptr inbounds i32, ptr %1, i64 %2 - ret ptr %3 + %1 = getelementptr inbounds i32, ptr %p, i64 %a + %2 = getelementptr inbounds i32, ptr %1, i64 1 + call void @use(ptr %p) + ret ptr %2 } -; It is valid to swap if the second GEP has multiple uses. +; Negative test. It is not valid to swap if the first GEP has multiple uses. define ptr @multipleUses2(ptr %p, i64 %a) { ; CHECK-LABEL: @multipleUses2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: call void @use(ptr nonnull [[TMP2]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; CHECK-NEXT: call void @use(ptr [[TMP1]]) ; CHECK-NEXT: ret ptr [[TMP2]] ; - %1 = getelementptr inbounds i32, ptr %p, i64 1 - %2 = getelementptr inbounds i32, ptr %1, i64 %a - call void @use(ptr %2) + %1 = getelementptr inbounds i32, ptr %p, i64 %a + %2 = getelementptr inbounds i32, ptr %1, i64 1 + call void @use(ptr %1) ret ptr %2 } -; Negative test. It is not valid to swap if the first GEP has multiple uses. -define ptr @multipleUses3(ptr %p) { -; CHECK-LABEL: @multipleUses3( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] -; CHECK-NEXT: ret ptr [[TMP3]] +; Test interaction with LICM. +define i64 @licm(ptr %p) { +; CHECK-LABEL: @licm( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P11:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 4 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[P2:%.*]] = getelementptr i64, ptr [[P11]], i64 [[I]] +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[P2]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[LOAD]] +; CHECK-NEXT: [[INEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[I]], 1000000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[ADD_LCSSA]] ; - %1 = getelementptr inbounds i32, ptr %p, i64 1 - %2 = ptrtoint ptr %1 to i64 - %3 = getelementptr inbounds i32, ptr %1, i64 %2 - ret ptr %3 +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %p1 = getelementptr i64, ptr %p, i64 %i + %p2 = getelementptr i64, ptr %p1, i64 4 + %load = load i64, ptr %p2 + %add = add nsw i64 %sum, %load + %inext = add nuw nsw i64 %i, 1 + %exitcond = icmp eq i64 %i, 1000000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i64 %add } diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -8,10 +8,10 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_EXT2:%.*]] = zext i32 [[CUR_MATCH:%.*]] to i64 -; CHECK-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[WIN:%.*]], i64 [[IDX_EXT2]] ; CHECK-NEXT: [[IDX_EXT1:%.*]] = zext i32 [[BEST_LEN:%.*]] to i64 -; CHECK-NEXT: [[ADD_PTR25:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR4]], i64 [[IDX_EXT1]] -; CHECK-NEXT: [[ADD_PTR36:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR25]], i64 -1 +; CHECK-NEXT: [[ADD_PTR42:%.*]] = getelementptr i8, i8* [[WIN:%.*]], i64 -1 +; CHECK-NEXT: [[ADD_PTR251:%.*]] = getelementptr i8, i8* [[ADD_PTR42]], i64 [[IDX_EXT2]] +; CHECK-NEXT: [[ADD_PTR36:%.*]] = getelementptr i8, i8* [[ADD_PTR251]], i64 [[IDX_EXT1]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ADD_PTR36]] to i32* ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[TMP1]], [[SCAN_END:%.*]] @@ -20,9 +20,9 @@ ; CHECK-NEXT: br label [[IF_THEN:%.*]] ; CHECK: do.body: ; CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[TMP4:%.*]] to i64 -; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i8, i8* [[WIN]], i64 [[IDX_EXT1]] -; CHECK-NEXT: [[ADD_PTR22:%.*]] = getelementptr i8, i8* [[ADD_PTR1]], i64 -1 -; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, i8* [[ADD_PTR22]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR46:%.*]] = getelementptr i8, i8* [[WIN]], i64 -1 +; CHECK-NEXT: [[ADD_PTR25:%.*]] = getelementptr i8, i8* [[ADD_PTR46]], i64 [[IDX_EXT1]] +; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, i8* [[ADD_PTR25]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ADD_PTR3]] to i32* ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], [[SCAN_END]] diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -126,6 +126,20 @@ ret ptr %2 } +; After canonicalizing, the second GEP is moved to the front, and then merged +; with the first one with rewritten indices. +; result = (i8*) &((struct.A*) &((struct.B*) p)[i].member2).member0 + 2 +define ptr @appendIndexReverse(ptr %p, i64 %i) { +; CHECK-LABEL: @appendIndexReverse( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP1]], i64 [[I:%.*]], i32 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.B, ptr %1, i64 %i, i32 1 + ret ptr %2 +} + ; Offset of either GEP is not divisible by the other's size, converted to i8* ; and merged. ; Here i24 is 8-bit aligned. @@ -186,8 +200,8 @@ ; address of another member. define ptr @partialConstantMemberAliasing2(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstantMemberAliasing2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds %struct.C, ptr %p, i64 %a, i32 1 @@ -199,8 +213,8 @@ ; range of the object currently pointed by the non-constant GEP. define ptr @partialConstantMemberAliasing3(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstantMemberAliasing3( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 2 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds %struct.C, ptr %p, i64 %a, i32 2 diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -240,8 +240,8 @@ define ptr @geps_combinable_different_elem_type7(ptr %a, i64 %idx) { ; CHECK-LABEL: @geps_combinable_different_elem_type7( -; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[IDX:%.*]], i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 4 +; CHECK-NEXT: [[A21:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4 +; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A21]], i64 [[IDX:%.*]], i32 1 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr { i32, i32 }, ptr %a, i64 %idx, i32 1 @@ -251,8 +251,8 @@ define ptr @geps_combinable_different_elem_type8(ptr %a, i64 %idx) { ; CHECK-LABEL: @geps_combinable_different_elem_type8( -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds { { i32, i32 } }, ptr [[A:%.*]], i64 [[IDX:%.*]], i32 0, i32 1 -; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4 +; CHECK-NEXT: [[A21:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4 +; CHECK-NEXT: [[A3:%.*]] = getelementptr { { i32, i32 } }, ptr [[A21]], i64 [[IDX:%.*]], i32 0, i32 1 ; CHECK-NEXT: ret ptr [[A3]] ; %a2 = getelementptr inbounds { { i32, i32 } }, ptr %a, i64 %idx, i32 0, i32 1 @@ -540,8 +540,8 @@ ; CHECK-NEXT: br label [[JOIN]] ; CHECK: join: ; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 1, [[IF]] ], [ 2, [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP1]] ; CHECK-NEXT: ret ptr [[GEP]] ; br i1 %c, label %if, label %else diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -44,8 +44,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8, !alias.scope !0 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !alias.scope !0 ; CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x double> [[WIDE_LOAD6]], <4 x double> poison, <4 x i32> @@ -56,8 +55,7 @@ ; CHECK-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE8]], <4 x double> poison), !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE10]], <4 x double> poison), !alias.scope !3, !noalias !0 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -762,11 +762,9 @@ ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32>