Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1677,7 +1677,26 @@ // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in // the address space. - if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ + if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { + bool SrcInvariant = PSE.getSE()->isLoopInvariant(Src, InnermostLoop); + bool SinkInvariant = PSE.getSE()->isLoopInvariant(Sink, InnermostLoop); + + assert(!(StrideAPtr && SrcInvariant) && "Cannot be strided and invariant"); + assert(!(StrideBPtr && SinkInvariant) && "Cannot be strided and invariant"); + + bool SrcAffine = StrideAPtr; + if (!SrcAffine && !SrcInvariant && isa(Src) && + cast(Src)->isAffine()) + SrcAffine = true; + + bool SinkAffine = StrideBPtr; + if (!SinkAffine && !SinkInvariant && isa(Sink) && + cast(Sink)->isAffine()) + SinkAffine = true; + + if (APtr != BPtr && (SrcAffine || SinkAffine)) + FoundNonConstantDistanceDependence = true; + LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return Dependence::Unknown; } Index: llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll +++ llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll @@ -126,12 +126,14 @@ define i32 @load_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'load_with_pointer_phi_outside_loop' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %ptr, align 8 -> -; CHECK-NEXT: store double %mul16, double* %arrayidx, align 8 +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ({{.*}}): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv +; CHECK-NEXT: Against group ({{.*}}): +; CHECK-NEXT: %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ] ; entry: br i1 %c.0, label %if.then, label %if.else @@ -164,12 +166,14 @@ define i32 @store_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_outside_loop' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr, align 8 +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ({{.*}}): +; CHECK-NEXT: %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ] +; CHECK-NEXT: Against group ({{.*}}): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv ; entry: br i1 %c.0, label %if.then, label %if.else @@ -202,43 +206,8 @@ define i32 @store_with_pointer_phi_incoming_phi(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr.2, align 8 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Check 0: -; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]): -; CHECK-NEXT: double* %C -; CHECK-NEXT: Against group ([[GROUP_B:.+]]): -; CHECK-NEXT: double* %B -; CHECK-NEXT: Check 1: -; CHECK-NEXT: Comparing group ([[GROUP_C]]): -; CHECK-NEXT: double* %C -; CHECK-NEXT: Against group ([[GROUP_A:.+]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv -; CHECK-NEXT: double* %A -; CHECK-NEXT: Check 2: -; CHECK-NEXT: Comparing group ([[GROUP_B]]): -; CHECK-NEXT: double* %B -; CHECK-NEXT: Against group ([[GROUP_A]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv -; CHECK-NEXT: double* %A -; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group [[GROUP_C]]: -; CHECK-NEXT: (Low: %C High: (8 + %C)) -; CHECK-NEXT: Member: %C -; CHECK-NEXT: Group [[GROUP_B]]: -; CHECK-NEXT: (Low: %B High: (8 + %B)) -; CHECK-NEXT: Member: %B -; CHECK-NEXT: Group [[GROUP_A]]: -; CHECK-NEXT: (Low: %A High: (256000 + %A)) -; CHECK-NEXT: Member: {%A,+,8}<%loop.header> -; CHECK-NEXT: Member: %A -; CHECK-EMPTY +; CHECK-NEXT: Memory dependences are safe with run-time checks +; entry: br label %loop.header @@ -279,43 +248,9 @@ define i32 @store_with_pointer_phi_incoming_phi_irreducible_cycle(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi_irreducible_cycle' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr.3, align 8 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Check 0: -; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]): -; CHECK-NEXT: double* %C -; CHECK-NEXT: Against group ([[GROUP_B:.+]]): -; CHECK-NEXT: double* %B -; CHECK-NEXT: Check 1: -; CHECK-NEXT: Comparing group ([[GROUP_C]]): -; CHECK-NEXT: double* %C -; CHECK-NEXT: Against group ([[GROUP_A:.+]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv -; CHECK-NEXT: double* %A -; CHECK-NEXT: Check 2: -; CHECK-NEXT: Comparing group ([[GROUP_B]]): -; CHECK-NEXT: double* %B -; CHECK-NEXT: Against group ([[GROUP_A]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv -; CHECK-NEXT: double* %A -; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group [[GROUP_C]] -; CHECK-NEXT: (Low: %C High: (8 + %C)) -; CHECK-NEXT: Member: %C -; CHECK-NEXT: Group [[GROUP_B]] -; CHECK-NEXT: (Low: %B High: (8 + %B)) -; CHECK-NEXT: Member: %B -; CHECK-NEXT: Group [[GROUP_A]] -; CHECK-NEXT: (Low: %A High: (256000 + %A)) -; CHECK-NEXT: Member: {%A,+,8}<%loop.header> -; CHECK-NEXT: Member: %A -; CHECK-EMPTY +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; entry: br label %loop.header @@ -351,12 +286,10 @@ define i32 @store_with_pointer_phi_outside_loop_select(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_outside_loop_select' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr, align 8 +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: ; entry: br i1 %c.0, label %if.then, label %if.else Index: llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll =================================================================== --- llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -9,85 +9,30 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A5:%.*]] = bitcast i32* [[A:%.*]] to i8* -; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] -; CHECK: for.body.lver.check: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP7]], [[TMP17]] -; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] -; CHECK: for.body.ph.lver.orig: -; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] -; CHECK: for.body.lver.orig: -; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 -; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXA_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXB_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]] -; CHECK-NEXT: [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 -; CHECK-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULA_LVER_ORIG]], i32* [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADD_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXD_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADE_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXE_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]] -; CHECK-NEXT: [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULC_LVER_ORIG]], i32* [[ARRAYIDXC_LVER_ORIG]], align 4 -; CHECK-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] -; CHECK: for.body.ph.ldist1: -; CHECK-NEXT: br label [[FOR_BODY_LDIST1:%.*]] -; CHECK: for.body.ldist1: -; CHECK-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 -; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0 -; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4 -; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] -; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 -; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]] -; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !3 -; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] -; CHECK: for.body.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 ; CHECK-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 +; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADA:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4 +; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADB:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4 +; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]] ; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 ; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 -; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD]] +; CHECK-NEXT: store i32 [[MULA]], i32* [[ARRAYIDXA_PLUS_4]], align 4 +; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 -; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 ; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]] -; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit3: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -144,84 +89,30 @@ ; CHECK-NEXT: [[A_INTPTR:%.*]] = ptrtoint i32* [[A_BASE]] to i64 ; CHECK-NEXT: call void @use64(i64 [[A_INTPTR]]) ; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[A_BASE]], i32 42 -; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] -; CHECK: for.body.lver.check: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP7]], [[TMP17]] -; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] -; CHECK: for.body.ph.lver.orig: -; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] -; CHECK: for.body.lver.orig: -; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 -; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXA_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXB_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]] -; CHECK-NEXT: [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 -; CHECK-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULA_LVER_ORIG]], i32* [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADD_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXD_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADE_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXE_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]] -; CHECK-NEXT: [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULC_LVER_ORIG]], i32* [[ARRAYIDXC_LVER_ORIG]], align 4 -; CHECK-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] -; CHECK: for.body.ph.ldist1: -; CHECK-NEXT: br label [[FOR_BODY_LDIST1:%.*]] -; CHECK: for.body.ldist1: -; CHECK-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 -; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !5 -; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4 -; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] -; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 -; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]] -; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !8 -; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] -; CHECK: for.body.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 ; CHECK-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 +; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADA:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4 +; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADB:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4 +; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]] ; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 ; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 -; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD]] +; CHECK-NEXT: store i32 [[MULA]], i32* [[ARRAYIDXA_PLUS_4]], align 4 +; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 -; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 ; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]] -; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit2: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -63,34 +63,34 @@ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP0]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[ADD_US]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP23]], i32 0 -; CHECK-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1 -; CHECK-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2 -; CHECK-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3 -; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[ADD_US]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP19]], i32 0 +; CHECK-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP19]], i32 1 +; CHECK-NEXT: store i32 [[TMP21]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP19]], i32 2 +; CHECK-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP19]], i32 3 +; CHECK-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] @@ -148,37 +148,91 @@ define void @no-par-mem-metadata(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-LABEL: @no-par-mem-metadata( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = ptrtoint i32* [[A:%.*]] to i64 ; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[M:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY3_LR_PH_US_PREHEADER:%.*]], label [[FOR_END15:%.*]] ; CHECK: for.body3.lr.ph.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[K:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[K]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV33:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX9_US]], align 4 -; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX9_US]], align 4 +; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP5]], 3 ; CHECK-NEXT: store i32 [[ADD10_US]], i32* [[ARRAYIDX9_US]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 ; CHECK-NEXT: [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]] ; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop [[LOOP2]] ; CHECK: for.body3.us: -; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ 0, [[FOR_BODY3_LR_PH_US]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 -; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP1]] +; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ [[BC_RESUME_VAL:%.*]], [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 +; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP6]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD4_US]] to i64 -; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 -; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM_US]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP7]], 1 ; CHECK-NEXT: store i32 [[ADD5_US]], i32* [[ARRAYIDX7_US:%.*]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 ; CHECK-NEXT: [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP1:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 -; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP3]], [[K:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], [[INDVARS_IV33]] +; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = shl nsw i64 [[TMP10]], 2 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[A1]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]] +; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 +; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP15]], [[K]] ; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV33]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp slt i32 [[TMP16]], [[TMP14]] +; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 [[A1]], [[TMP12]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP18]], 16 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[ADD_US]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[TMP21]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP26]], i32 0 +; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i32 1 +; CHECK-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX7_US]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP26]], i32 2 +; CHECK-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX7_US]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP26]], i32 3 +; CHECK-NEXT: store i32 [[TMP30]], i32* [[ARRAYIDX7_US]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY3_US]] ; CHECK: for.end15.loopexit: ; CHECK-NEXT: br label [[FOR_END15]] Index: llvm/test/Transforms/LoopVectorize/global_alias.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/global_alias.ll +++ llvm/test/Transforms/LoopVectorize/global_alias.ll @@ -777,7 +777,8 @@ ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @mayAlias01( -; CHECK-NOT: add nsw <4 x i32> +; CHECK: vector.memcheck +; CHECK: add nsw <4 x i32> ; CHECK: ret define i32 @mayAlias01(i32 %a) nounwind { @@ -827,7 +828,8 @@ ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @mayAlias02( -; CHECK-NOT: add nsw <4 x i32> +; CHECK: vector.memcheck +; CHECK: add nsw <4 x i32> ; CHECK: ret define i32 @mayAlias02(i32 %a) nounwind { Index: llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll +++ llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll @@ -259,38 +259,6 @@ ret void } -; // g) Dependence::Unknown -; // Different stride lengths -; void test_unknown_dep(int n, int* A) { -; for(int i=0; i < n; ++i) { -; A[(i+1)*4] = 10; -; A[i] = 100; -; } -; } - -; CHECK: remark: source.c:83:7: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:82:7 - -define void @test_unknown_dep(i64 %n, i32* nocapture %A) !dbg !214 { -entry: - %cmp8 = icmp sgt i64 %n, 0 - br i1 %cmp8, label %for.body, label %for.cond.cleanup - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %0 = shl nsw i64 %indvars.iv.next, 2 - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !229 - store i32 10, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !231 - store i32 100, i32* %arrayidx2, align 4, !dbg !231 - %exitcond.not = icmp eq i64 %indvars.iv.next, %n - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body - - for.cond.cleanup: ; preds = %for.body, %entry - ret void -} - ; YAML: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize ; YAML-NEXT: Name: CantIdentifyArrayBounds @@ -361,33 +329,6 @@ ; YAML-NEXT: - Location: 'source.c:74:21' ; YAML-NEXT: DebugLoc: { File: source.c, Line: 74, Column: 21 } ; YAML-NEXT: ... -; YAML-NEXT: --- !Missed -; YAML-NEXT: Pass: loop-vectorize -; YAML-NEXT: Name: MissedDetails -; YAML-NEXT: Function: test_backwardVectorizableButPreventsForwarding -; YAML-NEXT: Args: -; YAML-NEXT: - String: loop not vectorized -; YAML-NEXT: ... -; YAML-NEXT: --- !Analysis -; YAML-NEXT: Pass: loop-vectorize -; YAML-NEXT: Name: UnsafeDep -; YAML-NEXT: DebugLoc: { File: source.c, Line: 83, Column: 7 } -; YAML-NEXT: Function: test_unknown_dep -; YAML-NEXT: Args: -; YAML-NEXT: - String: 'loop not vectorized: ' -; YAML-NEXT: - String: 'unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop' -; YAML-NEXT: - String: "\nUnknown data dependence." -; YAML-NEXT: - String: ' Memory location is the same as accessed at ' -; YAML-NEXT: - Location: 'source.c:82:7' -; YAML-NEXT: DebugLoc: { File: source.c, Line: 82, Column: 7 } -; YAML-NEXT: ... -; YAML-NEXT: --- !Missed -; YAML-NEXT: Pass: loop-vectorize -; YAML-NEXT: Name: MissedDetails -; YAML-NEXT: Function: test_unknown_dep -; YAML-NEXT: Args: -; YAML-NEXT: - String: loop not vectorized -; YAML-NEXT: ... !llvm.dbg.cu = !{!0} Index: llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll +++ llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll @@ -109,7 +109,8 @@ define i32 @load_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: @load_with_pointer_phi_outside_loop -; CHECK-NOT: vector.body +; CHECK: vector.body +; CHECK: memcheck ; entry: br i1 %c.0, label %if.then, label %if.else @@ -141,7 +142,8 @@ define i32 @store_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: @store_with_pointer_phi_outside_loop -; CHECK-NOT: vector.body +; CHECK: vector.body +; CHECK: memcheck ; entry: br i1 %c.0, label %if.then, label %if.else Index: llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -92,97 +92,393 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [225 x double]* [[A:%.*]] to <225 x double>* ; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>* -; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] -; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV6]] +; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] +; CHECK: for.cond1.preheader.us.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[I]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[CONV6]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[I]], 225 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]]) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV6]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP26:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP5]] +; CHECK-NEXT: [[SCEVGEP24:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[CONV6]] +; CHECK-NEXT: [[SCEVGEP22:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 0 +; CHECK-NEXT: [[SCEVGEP18:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 0 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[SCEVGEP22]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult double* [[SCEVGEP20]], [[SCEVGEP18]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND028:%.*]] = icmp ult double* [[SCEVGEP]], [[SCEVGEP26]] +; CHECK-NEXT: [[BOUND129:%.*]] = icmp ult double* [[SCEVGEP24]], [[SCEVGEP18]] +; CHECK-NEXT: [[FOUND_CONFLICT30:%.*]] = and i1 [[BOUND028]], [[BOUND129]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT30]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY4_US_PREHEADER]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT35:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT36:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT35]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT37:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT38:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT37]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <2 x i32> [[STEP_ADD]], +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP10]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP18]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP17]], i64 2 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x double>, <2 x double>* [[TMP20]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT36]] +; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[WIDE_LOAD34]], [[BROADCAST_SPLAT38]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP16]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD39:%.*]] = load <2 x double>, <2 x double>* [[TMP24]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD40:%.*]] = load <2 x double>, <2 x double>* [[TMP26]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP27:%.*]] = fsub <2 x double> [[WIDE_LOAD39]], [[TMP21]] +; CHECK-NEXT: [[TMP28:%.*]] = fsub <2 x double> [[WIDE_LOAD40]], [[TMP22]] +; CHECK-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP23]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[TMP29]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP25]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[TMP30]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]], label [[FOR_BODY4_US_PREHEADER]] +; CHECK: for.body4.us.preheader: +; CHECK-NEXT: [[K_013_US_PH:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[K_013_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] +; CHECK-NEXT: [[K_013_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY4_US]] ], [ [[K_013_US_PH]], [[FOR_BODY4_US_PREHEADER]] ] ; CHECK-NEXT: [[CONV_US:%.*]] = zext i32 [[K_013_US]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[K_013_US]], 225 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[CONV_US]] -; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, double* [[TMP5]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, double* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP32:%.*]] = icmp ult i32 [[K_013_US]], 225 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP32]]) +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[CONV_US]] +; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, double* [[TMP33]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, double* [[TMP7]], align 8 ; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV_US]] -; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, double* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV_US]] +; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, double* [[TMP34]], align 8 ; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]] -; CHECK-NEXT: store double [[SUB_US]], double* [[TMP6]], align 8 +; CHECK-NEXT: store double [[SUB_US]], double* [[TMP34]], align 8 ; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[K_013_US]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i32 [[INC_US]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] +; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[CONV6]], 15 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[I]], 210 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP8]]) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP7]] +; CHECK-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[CONV6]], 15 +; CHECK-NEXT: [[TMP36:%.*]] = icmp ult i32 [[I]], 210 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP36]]) +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP35]] +; CHECK-NEXT: [[MIN_ITERS_CHECK_1:%.*]] = icmp ult i32 [[I]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_1]], label [[FOR_BODY4_US_PREHEADER_1:%.*]], label [[VECTOR_MEMCHECK_1:%.*]] +; CHECK: vector.memcheck.1: +; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[CONV6]], 16 +; CHECK-NEXT: [[SCEVGEP26_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = add nuw nsw i64 [[CONV6]], 15 +; CHECK-NEXT: [[SCEVGEP24_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = add nuw nsw i64 [[TMP3]], 16 +; CHECK-NEXT: [[SCEVGEP22_1:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP40]] +; CHECK-NEXT: [[SCEVGEP20_1:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 15 +; CHECK-NEXT: [[SCEVGEP18_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP40]] +; CHECK-NEXT: [[SCEVGEP_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 15 +; CHECK-NEXT: [[BOUND0_1:%.*]] = icmp ult double* [[SCEVGEP_1]], [[SCEVGEP22_1]] +; CHECK-NEXT: [[BOUND1_1:%.*]] = icmp ult double* [[SCEVGEP20_1]], [[SCEVGEP18_1]] +; CHECK-NEXT: [[FOUND_CONFLICT_1:%.*]] = and i1 [[BOUND0_1]], [[BOUND1_1]] +; CHECK-NEXT: [[BOUND028_1:%.*]] = icmp ult double* [[SCEVGEP_1]], [[SCEVGEP26_1]] +; CHECK-NEXT: [[BOUND129_1:%.*]] = icmp ult double* [[SCEVGEP24_1]], [[SCEVGEP18_1]] +; CHECK-NEXT: [[FOUND_CONFLICT30_1:%.*]] = and i1 [[BOUND028_1]], [[BOUND129_1]] +; CHECK-NEXT: [[CONFLICT_RDX_1:%.*]] = or i1 [[FOUND_CONFLICT_1]], [[FOUND_CONFLICT30_1]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX_1]], label [[FOR_BODY4_US_PREHEADER_1]], label [[VECTOR_PH_1:%.*]] +; CHECK: vector.ph.1: +; CHECK-NEXT: [[N_VEC_1:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP41:%.*]] = load double, double* [[TMP37]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT35_1:%.*]] = insertelement <2 x double> poison, double [[TMP41]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT36_1:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT35_1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT37_1:%.*]] = insertelement <2 x double> poison, double [[TMP41]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT38_1:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT37_1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY_1:%.*]] +; CHECK: vector.body.1: +; CHECK-NEXT: [[INDEX_1:%.*]] = phi i32 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] +; CHECK-NEXT: [[VEC_IND_1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH_1]] ], [ [[VEC_IND_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] +; CHECK-NEXT: [[STEP_ADD_1:%.*]] = add <2 x i32> [[VEC_IND_1]], +; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i32> [[VEC_IND_1]], +; CHECK-NEXT: [[TMP43:%.*]] = icmp ult <2 x i32> [[STEP_ADD_1]], +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP42]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP44]]) +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i1> [[TMP42]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i1> [[TMP43]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP46]]) +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i1> [[TMP43]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP47]]) +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <2 x i32> [[VEC_IND_1]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = zext i32 [[TMP48]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = add nuw nsw i64 [[TMP49]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = bitcast double* [[TMP51]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, <2 x double>* [[TMP52]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds double, double* [[TMP51]], i64 2 +; CHECK-NEXT: [[TMP54:%.*]] = bitcast double* [[TMP53]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34_1:%.*]] = load <2 x double>, <2 x double>* [[TMP54]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP55:%.*]] = fmul <2 x double> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT36_1]] +; CHECK-NEXT: [[TMP56:%.*]] = fmul <2 x double> [[WIDE_LOAD34_1]], [[BROADCAST_SPLAT38_1]] +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP50]] +; CHECK-NEXT: [[TMP58:%.*]] = bitcast double* [[TMP57]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD39_1:%.*]] = load <2 x double>, <2 x double>* [[TMP58]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds double, double* [[TMP57]], i64 2 +; CHECK-NEXT: [[TMP60:%.*]] = bitcast double* [[TMP59]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD40_1:%.*]] = load <2 x double>, <2 x double>* [[TMP60]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP61:%.*]] = fsub <2 x double> [[WIDE_LOAD39_1]], [[TMP55]] +; CHECK-NEXT: [[TMP62:%.*]] = fsub <2 x double> [[WIDE_LOAD40_1]], [[TMP56]] +; CHECK-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP57]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP61]], <2 x double>* [[TMP63]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP64:%.*]] = bitcast double* [[TMP59]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP62]], <2 x double>* [[TMP64]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i32 [[INDEX_1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT_1]] = add <2 x i32> [[VEC_IND_1]], +; CHECK-NEXT: [[TMP65:%.*]] = icmp eq i32 [[INDEX_NEXT_1]], [[N_VEC_1]] +; CHECK-NEXT: br i1 [[TMP65]], label [[MIDDLE_BLOCK_1:%.*]], label [[VECTOR_BODY_1]], !llvm.loop [[LOOP8]] +; CHECK: middle.block.1: +; CHECK-NEXT: [[CMP_N_1:%.*]] = icmp eq i32 [[N_VEC_1]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]], label [[FOR_BODY4_US_PREHEADER_1]] +; CHECK: for.body4.us.preheader.1: +; CHECK-NEXT: [[K_013_US_PH_1:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK_1]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[N_VEC_1]], [[MIDDLE_BLOCK_1]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] ; CHECK: for.body4.us.1: -; CHECK-NEXT: [[K_013_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ] +; CHECK-NEXT: [[K_013_US_1:%.*]] = phi i32 [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ], [ [[K_013_US_PH_1]], [[FOR_BODY4_US_PREHEADER_1]] ] ; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[K_013_US_1]], 15 -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[NARROW]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[K_013_US_1]], 210 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP10]] -; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, double* [[TMP12]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, double* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = icmp ult i32 [[K_013_US_1]], 210 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP67]]) +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP66]] +; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, double* [[TMP68]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, double* [[TMP37]], align 8 ; CHECK-NEXT: [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP10]] -; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, double* [[TMP13]], align 8 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP66]] +; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, double* [[TMP69]], align 8 ; CHECK-NEXT: [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]] -; CHECK-NEXT: store double [[SUB_US_1]], double* [[TMP13]], align 8 +; CHECK-NEXT: store double [[SUB_US_1]], double* [[TMP69]], align 8 ; CHECK-NEXT: [[INC_US_1]] = add nuw nsw i32 [[K_013_US_1]], 1 ; CHECK-NEXT: [[CMP2_US_1:%.*]] = icmp ult i32 [[INC_US_1]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_1]], label [[FOR_BODY4_US_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]] +; CHECK-NEXT: br i1 [[CMP2_US_1]], label [[FOR_BODY4_US_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]], !llvm.loop [[LOOP10]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1: -; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[CONV6]], 30 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i32 [[I]], 195 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP14]] +; CHECK-NEXT: [[TMP70:%.*]] = add nuw nsw i64 [[CONV6]], 30 +; CHECK-NEXT: [[TMP71:%.*]] = icmp ult i32 [[I]], 195 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP71]]) +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP70]] +; CHECK-NEXT: [[MIN_ITERS_CHECK_2:%.*]] = icmp ult i32 [[I]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_2]], label [[FOR_BODY4_US_PREHEADER_2:%.*]], label [[VECTOR_MEMCHECK_2:%.*]] +; CHECK: vector.memcheck.2: +; CHECK-NEXT: [[TMP73:%.*]] = add nuw nsw i64 [[CONV6]], 31 +; CHECK-NEXT: [[SCEVGEP26_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP73]] +; CHECK-NEXT: [[TMP74:%.*]] = add nuw nsw i64 [[CONV6]], 30 +; CHECK-NEXT: [[SCEVGEP24_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP74]] +; CHECK-NEXT: [[TMP75:%.*]] = add nuw nsw i64 [[TMP3]], 31 +; CHECK-NEXT: [[SCEVGEP22_2:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP75]] +; CHECK-NEXT: [[SCEVGEP20_2:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 30 +; CHECK-NEXT: [[SCEVGEP18_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP75]] +; CHECK-NEXT: [[SCEVGEP_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 30 +; CHECK-NEXT: [[BOUND0_2:%.*]] = icmp ult double* [[SCEVGEP_2]], [[SCEVGEP22_2]] +; CHECK-NEXT: [[BOUND1_2:%.*]] = icmp ult double* [[SCEVGEP20_2]], [[SCEVGEP18_2]] +; CHECK-NEXT: [[FOUND_CONFLICT_2:%.*]] = and i1 [[BOUND0_2]], [[BOUND1_2]] +; CHECK-NEXT: [[BOUND028_2:%.*]] = icmp ult double* [[SCEVGEP_2]], [[SCEVGEP26_2]] +; CHECK-NEXT: [[BOUND129_2:%.*]] = icmp ult double* [[SCEVGEP24_2]], [[SCEVGEP18_2]] +; CHECK-NEXT: [[FOUND_CONFLICT30_2:%.*]] = and i1 [[BOUND028_2]], [[BOUND129_2]] +; CHECK-NEXT: [[CONFLICT_RDX_2:%.*]] = or i1 [[FOUND_CONFLICT_2]], [[FOUND_CONFLICT30_2]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX_2]], label [[FOR_BODY4_US_PREHEADER_2]], label [[VECTOR_PH_2:%.*]] +; CHECK: vector.ph.2: +; CHECK-NEXT: [[N_VEC_2:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP76:%.*]] = load double, double* [[TMP72]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT35_2:%.*]] = insertelement <2 x double> poison, double [[TMP76]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT36_2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT35_2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT37_2:%.*]] = insertelement <2 x double> poison, double [[TMP76]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT38_2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT37_2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY_2:%.*]] +; CHECK: vector.body.2: +; CHECK-NEXT: [[INDEX_2:%.*]] = phi i32 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] +; CHECK-NEXT: [[VEC_IND_2:%.*]] = phi <2 x i32> [ , [[VECTOR_PH_2]] ], [ [[VEC_IND_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[VEC_IND_2]], +; CHECK-NEXT: [[TMP77:%.*]] = icmp ult <2 x i32> [[VEC_IND_2]], +; CHECK-NEXT: [[TMP78:%.*]] = icmp ult <2 x i32> [[STEP_ADD_2]], +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <2 x i1> [[TMP77]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP79]]) +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <2 x i1> [[TMP77]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP80]]) +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <2 x i1> [[TMP78]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP81]]) +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <2 x i1> [[TMP78]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP82]]) +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <2 x i32> [[VEC_IND_2]], i64 0 +; CHECK-NEXT: [[TMP84:%.*]] = zext i32 [[TMP83]] to i64 +; CHECK-NEXT: [[TMP85:%.*]] = add nuw nsw i64 [[TMP84]], 30 +; CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP85]] +; CHECK-NEXT: [[TMP87:%.*]] = bitcast double* [[TMP86]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, <2 x double>* [[TMP87]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds double, double* [[TMP86]], i64 2 +; CHECK-NEXT: [[TMP89:%.*]] = bitcast double* [[TMP88]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34_2:%.*]] = load <2 x double>, <2 x double>* [[TMP89]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP90:%.*]] = fmul <2 x double> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT36_2]] +; CHECK-NEXT: [[TMP91:%.*]] = fmul <2 x double> [[WIDE_LOAD34_2]], [[BROADCAST_SPLAT38_2]] +; CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP85]] +; CHECK-NEXT: [[TMP93:%.*]] = bitcast double* [[TMP92]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD39_2:%.*]] = load <2 x double>, <2 x double>* [[TMP93]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds double, double* [[TMP92]], i64 2 +; CHECK-NEXT: [[TMP95:%.*]] = bitcast double* [[TMP94]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD40_2:%.*]] = load <2 x double>, <2 x double>* [[TMP95]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP96:%.*]] = fsub <2 x double> [[WIDE_LOAD39_2]], [[TMP90]] +; CHECK-NEXT: [[TMP97:%.*]] = fsub <2 x double> [[WIDE_LOAD40_2]], [[TMP91]] +; CHECK-NEXT: [[TMP98:%.*]] = bitcast double* [[TMP92]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP96]], <2 x double>* [[TMP98]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP99:%.*]] = bitcast double* [[TMP94]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP97]], <2 x double>* [[TMP99]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT_2]] = add nuw i32 [[INDEX_2]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT_2]] = add <2 x i32> [[VEC_IND_2]], +; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[INDEX_NEXT_2]], [[N_VEC_2]] +; CHECK-NEXT: br i1 [[TMP100]], label [[MIDDLE_BLOCK_2:%.*]], label [[VECTOR_BODY_2]], !llvm.loop [[LOOP8]] +; CHECK: middle.block.2: +; CHECK-NEXT: [[CMP_N_2:%.*]] = icmp eq i32 [[N_VEC_2]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]], label [[FOR_BODY4_US_PREHEADER_2]] +; CHECK: for.body4.us.preheader.2: +; CHECK-NEXT: [[K_013_US_PH_2:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK_2]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[N_VEC_2]], [[MIDDLE_BLOCK_2]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] ; CHECK: for.body4.us.2: -; CHECK-NEXT: [[K_013_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ] -; CHECK-NEXT: [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30 -; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[NARROW17]] to i64 -; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[K_013_US_2]], 195 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]]) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, double* [[TMP19]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, double* [[TMP16]], align 8 +; CHECK-NEXT: [[K_013_US_2:%.*]] = phi i32 [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ], [ [[K_013_US_PH_2]], [[FOR_BODY4_US_PREHEADER_2]] ] +; CHECK-NEXT: [[NARROW41:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30 +; CHECK-NEXT: [[TMP101:%.*]] = zext i32 [[NARROW41]] to i64 +; CHECK-NEXT: [[TMP102:%.*]] = icmp ult i32 [[K_013_US_2]], 195 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP102]]) +; CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP101]] +; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, double* [[TMP103]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, double* [[TMP72]], align 8 ; CHECK-NEXT: [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, double* [[TMP20]], align 8 +; CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP101]] +; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, double* [[TMP104]], align 8 ; CHECK-NEXT: [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]] -; CHECK-NEXT: store double [[SUB_US_2]], double* [[TMP20]], align 8 +; CHECK-NEXT: store double [[SUB_US_2]], double* [[TMP104]], align 8 ; CHECK-NEXT: [[INC_US_2]] = add nuw nsw i32 [[K_013_US_2]], 1 ; CHECK-NEXT: [[CMP2_US_2:%.*]] = icmp ult i32 [[INC_US_2]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_2]], label [[FOR_BODY4_US_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]] +; CHECK-NEXT: br i1 [[CMP2_US_2]], label [[FOR_BODY4_US_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]], !llvm.loop [[LOOP10]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2: -; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[CONV6]], 45 -; CHECK-NEXT: [[TMP22:%.*]] = icmp ult i32 [[I]], 180 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP21]] +; CHECK-NEXT: [[TMP105:%.*]] = add nuw nsw i64 [[CONV6]], 45 +; CHECK-NEXT: [[TMP106:%.*]] = icmp ult i32 [[I]], 180 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP106]]) +; CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP105]] +; CHECK-NEXT: [[MIN_ITERS_CHECK_3:%.*]] = icmp ult i32 [[I]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_3]], label [[FOR_BODY4_US_PREHEADER_3:%.*]], label [[VECTOR_MEMCHECK_3:%.*]] +; CHECK: vector.memcheck.3: +; CHECK-NEXT: [[TMP108:%.*]] = add nuw nsw i64 [[CONV6]], 46 +; CHECK-NEXT: [[SCEVGEP26_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP108]] +; CHECK-NEXT: [[TMP109:%.*]] = add nuw nsw i64 [[CONV6]], 45 +; CHECK-NEXT: [[SCEVGEP24_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP109]] +; CHECK-NEXT: [[TMP110:%.*]] = add nuw nsw i64 [[TMP3]], 46 +; CHECK-NEXT: [[SCEVGEP22_3:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP110]] +; CHECK-NEXT: [[SCEVGEP20_3:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 45 +; CHECK-NEXT: [[SCEVGEP18_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP110]] +; CHECK-NEXT: [[SCEVGEP_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 45 +; CHECK-NEXT: [[BOUND0_3:%.*]] = icmp ult double* [[SCEVGEP_3]], [[SCEVGEP22_3]] +; CHECK-NEXT: [[BOUND1_3:%.*]] = icmp ult double* [[SCEVGEP20_3]], [[SCEVGEP18_3]] +; CHECK-NEXT: [[FOUND_CONFLICT_3:%.*]] = and i1 [[BOUND0_3]], [[BOUND1_3]] +; CHECK-NEXT: [[BOUND028_3:%.*]] = icmp ult double* [[SCEVGEP_3]], [[SCEVGEP26_3]] +; CHECK-NEXT: [[BOUND129_3:%.*]] = icmp ult double* [[SCEVGEP24_3]], [[SCEVGEP18_3]] +; CHECK-NEXT: [[FOUND_CONFLICT30_3:%.*]] = and i1 [[BOUND028_3]], [[BOUND129_3]] +; CHECK-NEXT: [[CONFLICT_RDX_3:%.*]] = or i1 [[FOUND_CONFLICT_3]], [[FOUND_CONFLICT30_3]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX_3]], label [[FOR_BODY4_US_PREHEADER_3]], label [[VECTOR_PH_3:%.*]] +; CHECK: vector.ph.3: +; CHECK-NEXT: [[N_VEC_3:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP111:%.*]] = load double, double* [[TMP107]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT35_3:%.*]] = insertelement <2 x double> poison, double [[TMP111]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT36_3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT35_3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT37_3:%.*]] = insertelement <2 x double> poison, double [[TMP111]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT38_3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT37_3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY_3:%.*]] +; CHECK: vector.body.3: +; CHECK-NEXT: [[INDEX_3:%.*]] = phi i32 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] +; CHECK-NEXT: [[VEC_IND_3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH_3]] ], [ [[VEC_IND_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[VEC_IND_3]], +; CHECK-NEXT: [[TMP112:%.*]] = icmp ult <2 x i32> [[VEC_IND_3]], +; CHECK-NEXT: [[TMP113:%.*]] = icmp ult <2 x i32> [[STEP_ADD_3]], +; CHECK-NEXT: [[TMP114:%.*]] = extractelement <2 x i1> [[TMP112]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP114]]) +; CHECK-NEXT: [[TMP115:%.*]] = extractelement <2 x i1> [[TMP112]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP115]]) +; CHECK-NEXT: [[TMP116:%.*]] = extractelement <2 x i1> [[TMP113]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP116]]) +; CHECK-NEXT: [[TMP117:%.*]] = extractelement <2 x i1> [[TMP113]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP117]]) +; CHECK-NEXT: [[TMP118:%.*]] = extractelement <2 x i32> [[VEC_IND_3]], i64 0 +; CHECK-NEXT: [[TMP119:%.*]] = zext i32 [[TMP118]] to i64 +; CHECK-NEXT: [[TMP120:%.*]] = add nuw nsw i64 [[TMP119]], 45 +; CHECK-NEXT: [[TMP121:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP120]] +; CHECK-NEXT: [[TMP122:%.*]] = bitcast double* [[TMP121]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, <2 x double>* [[TMP122]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds double, double* [[TMP121]], i64 2 +; CHECK-NEXT: [[TMP124:%.*]] = bitcast double* [[TMP123]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34_3:%.*]] = load <2 x double>, <2 x double>* [[TMP124]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP125:%.*]] = fmul <2 x double> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT36_3]] +; CHECK-NEXT: [[TMP126:%.*]] = fmul <2 x double> [[WIDE_LOAD34_3]], [[BROADCAST_SPLAT38_3]] +; CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP120]] +; CHECK-NEXT: [[TMP128:%.*]] = bitcast double* [[TMP127]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD39_3:%.*]] = load <2 x double>, <2 x double>* [[TMP128]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP129:%.*]] = getelementptr inbounds double, double* [[TMP127]], i64 2 +; CHECK-NEXT: [[TMP130:%.*]] = bitcast double* [[TMP129]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD40_3:%.*]] = load <2 x double>, <2 x double>* [[TMP130]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP131:%.*]] = fsub <2 x double> [[WIDE_LOAD39_3]], [[TMP125]] +; CHECK-NEXT: [[TMP132:%.*]] = fsub <2 x double> [[WIDE_LOAD40_3]], [[TMP126]] +; CHECK-NEXT: [[TMP133:%.*]] = bitcast double* [[TMP127]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP131]], <2 x double>* [[TMP133]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP134:%.*]] = bitcast double* [[TMP129]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP132]], <2 x double>* [[TMP134]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT_3]] = add nuw i32 [[INDEX_3]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT_3]] = add <2 x i32> [[VEC_IND_3]], +; CHECK-NEXT: [[TMP135:%.*]] = icmp eq i32 [[INDEX_NEXT_3]], [[N_VEC_3]] +; CHECK-NEXT: br i1 [[TMP135]], label [[MIDDLE_BLOCK_3:%.*]], label [[VECTOR_BODY_3]], !llvm.loop [[LOOP8]] +; CHECK: middle.block.3: +; CHECK-NEXT: [[CMP_N_3:%.*]] = icmp eq i32 [[N_VEC_3]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_PREHEADER_3]] +; CHECK: for.body4.us.preheader.3: +; CHECK-NEXT: [[K_013_US_PH_3:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK_3]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[N_VEC_3]], [[MIDDLE_BLOCK_3]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] ; CHECK: for.body4.us.3: -; CHECK-NEXT: [[K_013_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ] -; CHECK-NEXT: [[NARROW18:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45 -; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[NARROW18]] to i64 -; CHECK-NEXT: [[TMP25:%.*]] = icmp ult i32 [[K_013_US_3]], 180 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP25]]) -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP24]] -; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, double* [[TMP26]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, double* [[TMP23]], align 8 +; CHECK-NEXT: [[K_013_US_3:%.*]] = phi i32 [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ], [ [[K_013_US_PH_3]], [[FOR_BODY4_US_PREHEADER_3]] ] +; CHECK-NEXT: [[NARROW42:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45 +; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[NARROW42]] to i64 +; CHECK-NEXT: [[TMP137:%.*]] = icmp ult i32 [[K_013_US_3]], 180 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP137]]) +; CHECK-NEXT: [[TMP138:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP136]] +; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, double* [[TMP138]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, double* [[TMP107]], align 8 ; CHECK-NEXT: [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP24]] -; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, double* [[TMP27]], align 8 +; CHECK-NEXT: [[TMP139:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP136]] +; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, double* [[TMP139]], align 8 ; CHECK-NEXT: [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]] -; CHECK-NEXT: store double [[SUB_US_3]], double* [[TMP27]], align 8 +; CHECK-NEXT: store double [[SUB_US_3]], double* [[TMP139]], align 8 ; CHECK-NEXT: [[INC_US_3]] = add nuw nsw i32 [[K_013_US_3]], 1 ; CHECK-NEXT: [[CMP2_US_3:%.*]] = icmp ult i32 [[INC_US_3]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP10]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ;