Index: llvm/include/llvm/Analysis/BasicAliasAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -220,7 +220,6 @@ AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size, const AAMDNodes &V1AAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI); AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize, Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -376,6 +376,8 @@ const Value *VConst = V; return const_cast(getUnderlyingObject(VConst, MaxLookup)); } + const Value *getUnderlyingObject2(const Value *V, + unsigned MaxLookup = 6); /// This method is similar to getUnderlyingObject except that it can /// look through phi and select instructions and return multiple objects. Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -91,9 +91,9 @@ /// cannot be involved in a cycle. const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; -// The max limit of the search depth in DecomposeGEPExpression() and -// getUnderlyingObject(), both functions need to use the same search -// depth otherwise the algorithm in aliasGEP will assert. +// The max limit of the search depth in DecomposeGEPExpression() and, +// separately, limit used for getUnderlyingObject, We use to require +// these be the same, but is no longer required for correctness. static const unsigned MaxLookupSearchDepth = 6; bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, @@ -412,10 +412,6 @@ /// specified amount, but which may have other unrepresented high bits. As /// such, the gep cannot necessarily be reconstructed from its decomposed form. /// -/// This function is capable of analyzing everything that getUnderlyingObject -/// can look through. To be able to do that getUnderlyingObject and -/// DecomposeGEPExpression must use the same search depth -/// (MaxLookupSearchDepth). BasicAAResult::DecomposedGEP BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) { @@ -1045,7 +1041,7 @@ AliasResult BasicAAResult::aliasGEP( const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) { + AAQueryInfo &AAQI) { DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT); DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT); @@ -1055,10 +1051,6 @@ !DecompGEP2.HasCompileTimeConstantScale) return MayAlias; - assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 && - "DecomposeGEPExpression returned a result different from " - "getUnderlyingObject"); - // Subtract the GEP2 pointer from the GEP1 pointer to find out their // symbolic difference. DecompGEP1.Offset -= DecompGEP2.Offset; @@ -1088,13 +1080,13 @@ // when performing the alias check on the underlying objects. if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty()) return getBestAAResults().alias( - MemoryLocation(UnderlyingV1, V1Size, V1AAInfo), - MemoryLocation(UnderlyingV2, V2Size, V2AAInfo), AAQI); + MemoryLocation(DecompGEP1.Base, V1Size, V1AAInfo), + MemoryLocation(DecompGEP2.Base, V2Size, V2AAInfo), AAQI); // Do the base pointers alias? AliasResult BaseAlias = getBestAAResults().alias( - MemoryLocation::getBeforeOrAfter(UnderlyingV1), - MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI); + MemoryLocation::getBeforeOrAfter(DecompGEP1.Base), + MemoryLocation::getBeforeOrAfter(DecompGEP2.Base), AAQI); // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. @@ -1485,8 +1477,8 @@ return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = getUnderlyingObject(V1, MaxLookupSearchDepth); - const Value *O2 = getUnderlyingObject(V2, MaxLookupSearchDepth); + const Value *O1 = getUnderlyingObject2(V1, MaxLookupSearchDepth); + const Value *O2 = getUnderlyingObject2(V2, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1617,12 +1609,12 @@ AAQueryInfo &AAQI, const Value *O1, const Value *O2) { if (const GEPOperator *GV1 = dyn_cast(V1)) { AliasResult Result = - aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2, AAQI); + aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, AAQI); if (Result != MayAlias) return Result; } else if (const GEPOperator *GV2 = dyn_cast(V2)) { AliasResult Result = - aliasGEP(GV2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, O2, O1, AAQI); + aliasGEP(GV2, V2Size, V2AAInfo, V1, V1Size, V1AAInfo, AAQI); if (Result != MayAlias) return Result; } Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -4162,10 +4162,12 @@ return true; } -const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) { +/// Look back through the graph at most MaxLookup steps, stop at the first +/// phi or select encountered. Update MaxLookup with remaining budget. +const Value *getUnderlyingObjectStep(const Value *V, unsigned &MaxLookup) { if (!V->getType()->isPointerTy()) return V; - for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + for ( ; MaxLookup != 0; MaxLookup--) { if (auto *GEP = dyn_cast(V)) { V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast || @@ -4207,6 +4209,52 @@ return V; } +const Value *llvm::getUnderlyingObject(const Value *V, + unsigned MaxLookup) { + return getUnderlyingObjectStep(V, MaxLookup); +} + +const Value *llvm::getUnderlyingObject2(const Value *V, + unsigned MaxLookup) { + auto *FirstStep = getUnderlyingObjectStep(V, MaxLookup); + if (MaxLookup == 0) + return FirstStep; + + const Value *Object = nullptr; + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(FirstStep); + do { + const Value *P = Worklist.pop_back_val(); + P = getUnderlyingObjectStep(P, MaxLookup); + if (MaxLookup == 0) + // Exhausted budget + return FirstStep; + + if (!Visited.insert(P).second) + continue; + + if (auto *SI = dyn_cast(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + MaxLookup--; + continue; + } + + if (auto *PN = dyn_cast(P)) { + append_range(Worklist, PN->incoming_values()); + MaxLookup--; + continue; + } + + if (Object) + // Found at least two objects, fallback to conservative result + return FirstStep; + Object = P; + } while (!Worklist.empty()); + return Object ? Object : FirstStep; +} + void llvm::getUnderlyingObjects(const Value *V, SmallVectorImpl &Objects, LoopInfo *LI, unsigned MaxLookup) { Index: llvm/test/Analysis/BasicAA/recphi.ll =================================================================== --- llvm/test/Analysis/BasicAA/recphi.ll +++ llvm/test/Analysis/BasicAA/recphi.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s --check-prefixes=CHECK,NO-PHI-VALUES -; RUN: opt < %s -phi-values -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s --check-prefixes=CHECK,PHI-VALUES +; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -phi-values -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; CHECK-LABEL: Function: simple: 5 pointers, 0 call sites ; CHECK: NoAlias: float* %src1, float* %src2 @@ -39,11 +39,11 @@ ; CHECK: MustAlias: [2 x i32]* %tab, i32* %arrayidx1 ; CHECK: MustAlias: i32* %arrayidx1, i8* %0 ; CHECK: NoAlias: i32* %arrayidx, i32* %arrayidx1 -; CHECK: MayAlias: [2 x i32]* %tab, i32* %p.addr.05.i +; CHECK: PartialAlias: [2 x i32]* %tab, i32* %p.addr.05.i ; CHECK: MayAlias: i32* %p.addr.05.i, i8* %0 ; CHECK: MayAlias: i32* %arrayidx, i32* %p.addr.05.i ; CHECK: MayAlias: i32* %arrayidx1, i32* %p.addr.05.i -; CHECK: MayAlias: [2 x i32]* %tab, i32* %incdec.ptr.i +; CHECK: PartialAlias: [2 x i32]* %tab, i32* %incdec.ptr.i ; CHECK: NoAlias: i32* %incdec.ptr.i, i8* %0 ; CHECK: MayAlias: i32* %arrayidx, i32* %incdec.ptr.i ; CHECK: NoAlias: i32* %arrayidx1, i32* %incdec.ptr.i @@ -92,11 +92,11 @@ ; CHECK: PartialAlias: [10 x i32]* %tab, i32* %arrayidx1 ; CHECK: NoAlias: i32* %arrayidx1, i8* %0 ; CHECK: NoAlias: i32* %arrayidx, i32* %arrayidx1 -; CHECK: MayAlias: [10 x i32]* %tab, i32* %p.addr.05.i +; CHECK: PartialAlias: [10 x i32]* %tab, i32* %p.addr.05.i ; CHECK: MayAlias: i32* %p.addr.05.i, i8* %0 ; CHECK: MayAlias: i32* %arrayidx, i32* %p.addr.05.i ; CHECK: MayAlias: i32* %arrayidx1, i32* %p.addr.05.i -; CHECK: MayAlias: [10 x i32]* %tab, i32* %incdec.ptr.i +; CHECK: PartialAlias: [10 x i32]* %tab, i32* %incdec.ptr.i ; CHECK: MayAlias: i32* %incdec.ptr.i, i8* %0 ; CHECK: MayAlias: i32* %arrayidx, i32* %incdec.ptr.i ; CHECK: MayAlias: i32* %arrayidx1, i32* %incdec.ptr.i @@ -143,10 +143,10 @@ ; CHECK: NoAlias: i16* %_tmp1, i16** %argv.6.par ; CHECK: PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp1 ; CHECK: NoAlias: i16* %ls1.9.0, i16** %argv.6.par -; CHECK: MayAlias: [3 x i16]* %int_arr.10, i16* %ls1.9.0 +; CHECK: PartialAlias: [3 x i16]* %int_arr.10, i16* %ls1.9.0 ; CHECK: MayAlias: i16* %_tmp1, i16* %ls1.9.0 ; CHECK: NoAlias: i16* %_tmp7, i16** %argv.6.par -; CHECK: MayAlias: [3 x i16]* %int_arr.10, i16* %_tmp7 +; CHECK: PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp7 ; CHECK: MayAlias: i16* %_tmp1, i16* %_tmp7 ; CHECK: NoAlias: i16* %_tmp7, i16* %ls1.9.0 ; CHECK: NoAlias: i16* %_tmp11, i16** %argv.6.par @@ -237,8 +237,7 @@ ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p.outer ; CHECK: NoAlias: i8* %a, i8* %p.outer.next -; NO-PHI-VALUES: MayAlias: i8* %a, i8* %p.inner -; PHI-VALUES: NoAlias: i8* %a, i8* %p.inner +; CHECK: NoAlias: i8* %a, i8* %p.inner ; CHECK: NoAlias: i8* %a, i8* %p.inner.next define void @nested_loop(i1 %c, i1 %c2, i8* noalias %p.base) { entry: @@ -267,7 +266,7 @@ ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p.outer ; CHECK: NoAlias: i8* %a, i8* %p.outer.next -; CHECK: MayAlias: i8* %a, i8* %p.inner +; CHECK: NoAlias: i8* %a, i8* %p.inner ; CHECK: NoAlias: i8* %a, i8* %p.inner.next ; TODO: (a, p.inner) could be NoAlias define void @nested_loop2(i1 %c, i1 %c2, i8* noalias %p.base) { @@ -296,8 +295,7 @@ ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p.outer ; CHECK: NoAlias: i8* %a, i8* %p.outer.next -; NO-PHI-VALUES: NoAlias: i8* %a, i8* %p.inner -; PHI-VALUES: MayAlias: i8* %a, i8* %p.inner +; CHECK: NoAlias: i8* %a, i8* %p.inner ; CHECK: NoAlias: i8* %a, i8* %p.inner.next define void @nested_loop3(i1 %c, i1 %c2, i8* noalias %p.base) { entry: @@ -325,9 +323,8 @@ ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p1 ; CHECK: NoAlias: i8* %a, i8* %p1.next -; CHECK: MayAlias: i8* %a, i8* %p2 +; CHECK: NoAlias: i8* %a, i8* %p2 ; CHECK: NoAlias: i8* %a, i8* %p2.next -; TODO: %p2 does not alias %a define void @sibling_loop(i1 %c, i1 %c2, i8* noalias %p.base) { entry: %a = alloca i8 @@ -351,8 +348,7 @@ ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p1 ; CHECK: NoAlias: i8* %a, i8* %p1.next -; NO-PHI-VALUES: NoAlias: i8* %a, i8* %p2 -; PHI-VALUES: MayAlias: i8* %a, i8* %p2 +; CHECK: NoAlias: i8* %a, i8* %p2 ; CHECK: NoAlias: i8* %a, i8* %p2.next define void @sibling_loop2(i1 %c, i1 %c2, i8* noalias %p.base) { entry: Index: llvm/test/CodeGen/Hexagon/swp-epilog-phi13.ll =================================================================== --- llvm/test/CodeGen/Hexagon/swp-epilog-phi13.ll +++ llvm/test/CodeGen/Hexagon/swp-epilog-phi13.ll @@ -1,16 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 -pipeliner-experimental-cg=true < %s | FileCheck %s ; Test epilogue generation when reading loop-carried dependency in stage 1 from ; stage 0. Make sure the illegal phi the expender creates gets cleaned up ; correctly during peeling -; CHECK: loop0 -; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-9]+}},#8) -; CHECK: memw([[REG0]]+#0) -; CHECK: endloop0 - ; Function Attrs: nounwind define i32* @f0(i16* nocapture readonly %a0, i32 %a1, i32 %a2, i32 %a3, i16* %b) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: if (p0) jumpr r31 +; CHECK-NEXT: } +; CHECK-NEXT: .LBB0_1: // %b1 +; CHECK-NEXT: { +; CHECK-NEXT: r3 = asl(r2,#1) +; CHECK-NEXT: r7 = add(r1,#-2) +; CHECK-NEXT: r2 = #0 +; CHECK-NEXT: r5 = memh(r4+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = r5 +; CHECK-NEXT: r0 = add(r2,#8) +; CHECK-NEXT: p0 = cmp.gtu(r1,#1) +; CHECK-NEXT: r8 = add(r4,r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = mpy(r5.l,r5.l) +; CHECK-NEXT: r6 += mpy(r0.l,r0.l):sat +; CHECK-NEXT: memw(r2+#0) = r6.new +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB0_5 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %b2 +; CHECK-NEXT: { +; CHECK-NEXT: r4 = mpy(r5.l,r5.l) +; CHECK-NEXT: p0 = cmp.gtu(r1,#2) +; CHECK-NEXT: r1 = add(r2,#8) +; CHECK-NEXT: r9 = memh(r8+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = r9 +; CHECK-NEXT: r8 = add(r8,r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 += mpy(r0.l,r0.l):sat +; CHECK-NEXT: memw(r0+#0) = r5.new +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: loop0(.LBB0_3,r7) +; CHECK-NEXT: r5 = mpy(r9.l,r9.l) +; CHECK-NEXT: r7:6 = combine(#0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB0_4 +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .Ltmp0: // Block address taken +; CHECK-NEXT: .LBB0_3: // %b2 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r7 = r2 +; CHECK-NEXT: r2 = r0 +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: r9 = memh(r8+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mpy(r4.l,r4.l) +; CHECK-NEXT: r4 = mpy(r5.l,r5.l) +; CHECK-NEXT: r6 = r7 +; CHECK-NEXT: memw(r6+#8) = r1.new +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = mpy(r9.l,r9.l) +; CHECK-NEXT: r9 += mpy(r0.l,r0.l):sat +; CHECK-NEXT: r1 = add(r2,#8) +; CHECK-NEXT: r8 = add(r8,r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: nop +; CHECK-NEXT: memw(r0+#0) = r9 +; CHECK-NEXT: } :endloop0 +; CHECK-NEXT: .LBB0_4: // %b2 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mpy(r4.l,r4.l) +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: memw(r7+#8) = r1.new +; CHECK-NEXT: } +; CHECK-NEXT: .LBB0_5: // %b2 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mpy(r5.l,r5.l) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mpy(r1.l,r1.l) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: memw(r2+#8) = r1.new +; CHECK-NEXT: } b0: br i1 undef, label %b1, label %b3 Index: llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -876,62 +876,31 @@ define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias nocapture %z) { ; CHECK-LABEL: @mult_ptr_iv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Z:%.*]], i32 3000 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[X:%.*]], i32 3000 -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[SCEVGEP1]], [[Z]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[SCEVGEP]], [[X]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[X]], i32 3000 -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[Z]], i32 3000 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi i8* [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i8* [ [[Z:%.*]], [[ENTRY]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI5]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !26 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> , <4 x i8> undef) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !26 -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !26 +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> , <4 x i8> undef) ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], -; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]] -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]] +; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER6]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> ), !alias.scope !29, !noalias !26 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> ) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> ), !alias.scope !29, !noalias !26 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> ), !alias.scope !29, !noalias !26 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i32 12 -; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, i8* [[POINTER_PHI5]], i32 12 -; CHECK-NEXT: br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP31:!llvm.loop !.*]] -; CHECK: for.body: -; CHECK-NEXT: [[X_ADDR_050:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[Z_ADDR_049:%.*]] = phi i8* [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ] -; CHECK-NEXT: [[I_048:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[X_ADDR_050]], align 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 -; CHECK-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP10]], 10 -; CHECK-NEXT: [[MUL1:%.*]] = mul i8 [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[MUL2:%.*]] = mul i8 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 1 -; CHECK-NEXT: store i8 [[MUL]], i8* [[Z_ADDR_049]], align 1 -; CHECK-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 2 -; CHECK-NEXT: store i8 [[MUL1]], i8* [[INCDEC_PTR32]], align 1 -; CHECK-NEXT: [[INCDEC_PTR34]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 3 -; CHECK-NEXT: store i8 [[MUL2]], i8* [[INCDEC_PTR33]], align 1 -; CHECK-NEXT: [[INC]] = add nuw i32 [[I_048]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], [[LOOP32:!llvm.loop !.*]] +; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i32 12 +; CHECK-NEXT: br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] ; CHECK: end: ; CHECK-NEXT: ret void ;