diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1943,6 +1943,14 @@ if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; + // LICM moves a GEP with constant indices to the front, while canonicalization + // swaps it to the back of a non-constant GEP. If both transformations can be + // applied, LICM takes priority because it generally provides greater + // optimization by reducing instruction count in the loop body, but performing + // canonicalization swapping first negates the LICM opportunity while it does + // not necessarily reduce instruction count. + bool ShouldCanonicalizeSwap = true; + if (Src->getResultElementType() == GEP.getSourceElementType() && Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && Src->hasOneUse()) { @@ -1952,6 +1960,12 @@ if (LI) { // Try to reassociate loop invariant GEP chains to enable LICM. if (Loop *L = LI->getLoopFor(GEP.getParent())) { + // If SO1 is invariant and GO1 is variant, they should not be swapped by + // canonicalization even if it can be applied, otherwise it triggers + // LICM swapping in the next iteration, causing an infinite loop. + if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1)) + ShouldCanonicalizeSwap = false; + // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. @@ -1976,12 +1990,31 @@ } } - // Note that if our source is a gep chain itself then we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - if (auto *SrcGEP = dyn_cast(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return nullptr; // Wait until our source is folded to completion. + // Canonicalize swapping. Swap GEP with constant index suffix to the back if + // it doesn't violate def-use relations or contradict with loop invariant + // swap above. This allows more potential applications of constant-indexed GEP + // optimizations below. + if (ShouldCanonicalizeSwap && Src->hasOneUse() && + Src->getPointerOperandType() == GEP.getPointerOperandType() && + Src->getType()->isVectorTy() == GEP.getType()->isVectorTy() && + !isa(Src->getPointerOperand())) { + // When swapping, GEP with all constant indices are more prioritized than + // GEP with only the last few indices (but not all) being constant because + // it may be merged with GEP with all constant indices. + if ((isa(*(Src->indices().end() - 1)) && + !isa(*(GEP.indices().end() - 1))) || + (Src->hasAllConstantIndices() && !GEP.hasAllConstantIndices())) { + // Cannot guarantee inbounds after swapping because the non-const GEP can + // have arbitrary sign. + Value *NewSrc = Builder.CreateGEP( + GEP.getSourceElementType(), Src->getOperand(0), + SmallVector(GEP.indices()), Src->getName()); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + Src->getSourceElementType(), NewSrc, + SmallVector(Src->indices()), GEP.getName()); + return NewGEP; + } + } // For constant GEPs, use a more general offset-based folding approach. // Only do this for opaque pointers, as the result element type may change. diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll @@ -1,21 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s +; RUN: opt < %s -passes='require,instcombine' -opaque-pointers -S | FileCheck %s ; Constant-indexed GEP instructions in a chain of GEP instructions should be ; swapped to the end whenever such transformation is valid. This allows them to ; be merged. -declare void @use(i1) - ; The constant-indexed GEP instruction should be swapped to the end, even ; without merging. -; result = (((ptr) p + a) + b) + 1 +; result = (((i32*) p + a) + b) + 1 define ptr @basic(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @basic( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1 ; CHECK-NEXT: ret ptr [[TMP3]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 @@ -27,33 +25,34 @@ ; GEP with the last index being a constant should also be swapped. define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @partialConstant1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] -; CHECK-NEXT: ret ptr [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 1 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1 - %2 = getelementptr inbounds i32, ptr %p, i64 %b + %2 = getelementptr inbounds i32, ptr %1, i64 %b ret ptr %2 } ; Negative test. GEP should not be swapped if the last index is not a constant. define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @partialConstant2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] -; CHECK-NEXT: ret ptr [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a - %2 = getelementptr inbounds i32, ptr %p, i64 %b + %2 = getelementptr inbounds i32, ptr %1, i64 %b ret ptr %2 } -; Constant-indexed GEP are merged after swawpping. -; result = ((ptr) p + a) + 3 +; Constant-indexed GEP are merged after swapping. +; result = ((i32*) p + a) + 3 define ptr @merge(ptr %p, i64 %a) { ; CHECK-LABEL: @merge( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 3 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %a @@ -64,16 +63,14 @@ ; Multiple constant-indexed GEP. Note that the first two cannot be merged at ; first, but after the second and third are merged, the result can be merged ; with the first one on the next pass. -; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9 +; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9 define ptr @nested(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @nested( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1 -; CHECK-NEXT: ret ptr [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <3 x i32>, ptr [[TMP3]], i64 10 +; CHECK-NEXT: ret ptr [[TMP4]] ; %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1 %2 = getelementptr inbounds i8, ptr %1, i64 %a @@ -87,9 +84,9 @@ ; It is valid to swap if the source operand of the first GEP has multiple uses. define ptr @multipleUses1(ptr %p) { ; CHECK-LABEL: @multipleUses1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1 ; CHECK-NEXT: ret ptr [[TMP3]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 @@ -98,23 +95,9 @@ ret ptr %3 } -; It is valid to swap if the second GEP has multiple uses. -define ptr @multipleUses2(ptr %p, i64 %a) { -; CHECK-LABEL: @multipleUses2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: call void @use(ptr nonnull [[TMP2]]) -; CHECK-NEXT: ret ptr [[TMP2]] -; - %1 = getelementptr inbounds i32, ptr %p, i64 1 - %2 = getelementptr inbounds i32, ptr %1, i64 %a - call void @use(ptr %2) - ret ptr %2 -} - ; Negative test. It is not valid to swap if the first GEP has multiple uses. -define ptr @multipleUses3(ptr %p) { -; CHECK-LABEL: @multipleUses3( +define ptr @multipleUses2(ptr %p) { +; CHECK-LABEL: @multipleUses2( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] @@ -125,3 +108,40 @@ %3 = getelementptr inbounds i32, ptr %1, i64 %2 ret ptr %3 } + +; Negative test. LICM should take priority over canonicalization, so the first +; GEP should not be swapped, even if it contains a constant index. +define i64 @licm(ptr %p) { +; CHECK-LABEL: @licm( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 4 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i64, ptr [[P1]], i64 [[I]] +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[P2]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[LOAD]] +; CHECK-NEXT: [[INEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[I]], 1000000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[ADD]] +; +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %p1 = getelementptr i64, ptr %p, i64 4 + %p2 = getelementptr i64, ptr %p1, i64 %i + %load = load i64, ptr %p2 + %add = add nsw i64 %sum, %load + %inext = add nuw nsw i64 %i, 1 + %exitcond = icmp eq i64 %i, 1000000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i64 %add +} diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -9,7 +9,7 @@ %struct.B = type { i8, [3 x i16], %struct.A, float } %struct.C = type { i8, i32, i32 } -; result = (ptr) p + 3 +; result = (i32*) p + 3 define ptr @mergeBasic(ptr %p) { ; CHECK-LABEL: @mergeBasic( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3 @@ -20,8 +20,8 @@ ret ptr %2 } -; Converted to ptr and merged. -; result = (ptr) p + 10 +; Converted to i8* and merged. +; result = (i8*) p + 10 define ptr @mergeDifferentTypes(ptr %p) { ; CHECK-LABEL: @mergeDifferentTypes( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10 @@ -32,8 +32,8 @@ ret ptr %2 } -; Converted to ptr and merged. -; result = (ptr) p + 10 +; Converted to i8* and merged. +; result = (i8*) p + 10 define ptr @mergeReverse(ptr %p) { ; CHECK-LABEL: @mergeReverse( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10 @@ -55,7 +55,7 @@ ret ptr %2 } -; result = (ptr) ((ptr) p + 1) + 17 +; result = (i8*) (([20 x i8]*) p + 1) + 17 define ptr @array1(ptr %p) { ; CHECK-LABEL: @array1( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17 @@ -66,8 +66,8 @@ ret ptr %2 } -; Converted to ptr and merged. -; result = (ptr) p + 20 +; Converted to i8* and merged. +; result = (i8*) p + 20 define ptr @array2(ptr %p) { ; CHECK-LABEL: @array2( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 20 @@ -78,8 +78,8 @@ ret ptr %2 } -; Converted to ptr and merged. -; result = (ptr) p + 36 +; Converted to i8* and merged. +; result = (i8*) p + 36 define ptr @struct1(ptr %p) { ; CHECK-LABEL: @struct1( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 36 @@ -101,7 +101,7 @@ ret ptr %2 } -; result = (ptr) &((struct.B) p)[0].member2.member0 + 7 +; result = (i8*) &((struct.B) p)[0].member2.member0 + 7 define ptr @structStruct(ptr %p) { ; CHECK-LABEL: @structStruct( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7 @@ -115,7 +115,7 @@ ; First GEP offset is not divisible by last GEP's source element size, but first ; GEP points to an array such that the last GEP offset is divisible by the ; array's element size, so the first GEP can be rewritten with an extra index. -; result = (ptr) &((struct.B*) p)[i].member1 + 2 +; result = (i16*) &((struct.B*) p)[i].member1 + 2 define ptr @appendIndex(ptr %p, i64 %i) { ; CHECK-LABEL: @appendIndex( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 1, i64 2 @@ -126,10 +126,23 @@ ret ptr %2 } -; Offset of either GEP is not divisible by the other's size, converted to ptr +; After canonicalizing, the second GEP is moved to the front, and then merged +; with the first one with rewritten indices. +; result = (i8*) &((struct.A*) &((struct.B*) p)[i].member2).member0 + 2 +define ptr @appendIndexReverse(ptr %p, i64 %i) { +; CHECK-LABEL: @appendIndexReverse( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 2, i32 0, i64 2 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.B, ptr %1, i64 %i, i32 1 + ret ptr %2 +} + +; Offset of either GEP is not divisible by the other's size, converted to i8* ; and merged. ; Here i24 is 8-bit aligned. -; result = (ptr) p + 7 +; result = (i8*) p + 7 define ptr @notDivisible(ptr %p) { ; CHECK-LABEL: @notDivisible( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7 @@ -144,8 +157,8 @@ ; or divisible by the other's size. define ptr @partialConstant2(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstant2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i64], ptr [[P:%.*]], i64 [[A:%.*]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1723,10 +1723,10 @@ define void @ashr_out_of_range_1(ptr %A) { ; CHECK-LABEL: @ashr_out_of_range_1( ; CHECK-NEXT: [[L:%.*]] = load i177, ptr [[A:%.*]], align 4 -; CHECK-NEXT: [[G11:%.*]] = getelementptr i177, ptr [[A]], i64 -1 ; CHECK-NEXT: [[B24_LOBIT:%.*]] = ashr i177 [[L]], 175 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i177 [[B24_LOBIT]] to i64 -; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP1]] +; CHECK-NEXT: [[G111:%.*]] = getelementptr i177, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G111]], i64 -1 ; CHECK-NEXT: store i177 0, ptr [[G62]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -38,8 +38,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> @@ -50,8 +49,7 @@ ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7 ; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -762,11 +762,9 @@ ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> diff --git a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll --- a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll +++ b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll @@ -11,14 +11,14 @@ ; CHECK-NEXT: start: ; CHECK-NEXT: [[DATA:%.*]] = alloca [2 x i8], align 2 ; CHECK-NEXT: store i16 [[TMP0:%.*]], ptr [[DATA]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1 ; CHECK-NEXT: br label [[BB6_I_I:%.*]] ; CHECK: bb6.i.i: ; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ] ; CHECK-NEXT: [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]] ; CHECK-NEXT: [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1 ; CHECK-NEXT: [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]] -; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[_40_I_I]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [0 x i8], ptr [[DATA]], i64 0, i64 [[_40_I_I]] +; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr i8, ptr [[TMP1:%.*]], i64 1 ; CHECK-NEXT: [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1 ; CHECK-NEXT: [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1 ; CHECK-NEXT: store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1