Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1875,7 +1875,24 @@ // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in // the address space. - if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ + if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { + bool SrcInvariant = SE.isLoopInvariant(Src, InnermostLoop); + bool SinkInvariant = SE.isLoopInvariant(Sink, InnermostLoop); + + assert(!(StrideAPtr && SrcInvariant) && "Cannot be strided and invariant"); + assert(!(StrideBPtr && SinkInvariant) && "Cannot be strided and invariant"); + + bool SrcAffine = !SrcInvariant && isa(Src) && + cast(Src)->isAffine(); + bool SinkAffine = !SinkInvariant && isa(Sink) && + cast(Sink)->isAffine(); + + // Cases like A[B[i]] are still avoided, because of an unpredictable stride. + // We won't generate runtime checks because we won't be able to identify + // the bounds for the check. + if (APtr != BPtr && (SrcAffine || SinkAffine)) + FoundNonConstantDistanceDependence = true; + LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return Dependence::Unknown; } Index: llvm/test/Analysis/LoopAccessAnalysis/loop-invariant-dep-with-backedge-taken-count.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/loop-invariant-dep-with-backedge-taken-count.ll +++ llvm/test/Analysis/LoopAccessAnalysis/loop-invariant-dep-with-backedge-taken-count.ll @@ -6,15 +6,21 @@ define void @test_distance_greater_than_BTC_100(ptr %a) { ; CHECK-LABEL: Loop access info in function 'test_distance_greater_than_BTC_100': ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l = load i32, ptr %gep.x, align 4 -> -; CHECK-NEXT: store i32 %l, ptr %gep, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %gep = getelementptr i32, ptr %a, i32 %iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %gep.x = getelementptr i32, ptr %a, i32 100 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %a High: (400 + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (400 + %a) High: (404 + %a)) +; CHECK-NEXT: Member: (400 + %a) ; CHECK-EMPTY: ; entry: @@ -37,15 +43,21 @@ define void @test_distance_much_greater_than_BTC_100(ptr %a) { ; CHECK-LABEL: Loop access info in function 'test_distance_much_greater_than_BTC_100': ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l = load i32, ptr %gep.x, align 4 -> -; CHECK-NEXT: store i32 %l, ptr %gep, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %gep = getelementptr i32, ptr %a, i32 %iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %gep.x = getelementptr i32, ptr %a, i32 200 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %a High: (400 + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (800 + %a) High: (804 + %a)) +; CHECK-NEXT: Member: (800 + %a) ; CHECK-EMPTY: ; entry: @@ -68,15 +80,21 @@ define void @test_distance_equal_BTC_100(ptr %a) { ; CHECK-LABEL: Loop access info in function 'test_distance_equal_BTC_100': ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l = load i32, ptr %gep.x, align 4 -> -; CHECK-NEXT: store i32 %l, ptr %gep, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %gep = getelementptr i32, ptr %a, i32 %iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %gep.x = getelementptr i32, ptr %a, i32 99 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %a High: (400 + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (396 + %a) High: (400 + %a)) +; CHECK-NEXT: Member: (396 + %a) ; CHECK-EMPTY: ; entry: @@ -99,16 +117,20 @@ define void @test_distance_greater_than_BTC_10000(ptr %a) { ; CHECK-LABEL: Loop access info in function 'test_distance_greater_than_BTC_10000': ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l = load i32, ptr %gep.x, align 4 -> -; CHECK-NEXT: store i32 %l, ptr %gep, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %gep = getelementptr i32, ptr %a, i32 %iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %gep.x = getelementptr i32, ptr %a, i32 10000 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %a High: (40000 + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (40000 + %a) High: (40004 + %a)) ; entry: %gep.x = getelementptr i32, ptr %a, i32 10000 @@ -130,16 +152,21 @@ define void @test_distance_equal_to_BTC_10000(ptr %a) { ; CHECK-LABEL: Loop access info in function 'test_distance_equal_to_BTC_10000': ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l = load i32, ptr %gep.x, align 4 -> -; CHECK-NEXT: store i32 %l, ptr %gep, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %gep = getelementptr i32, ptr %a, i32 %iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %gep.x = getelementptr i32, ptr %a, i32 9999 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %a High: (400000 + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (39996 + %a) High: (40000 + %a)) +; CHECK-NEXT: Member: (39996 + %a) ; entry: %gep.x = getelementptr i32, ptr %a, i32 9999 @@ -161,15 +188,21 @@ define void @test_btc_is_unknown_value(ptr %a, i32 %N) { ; CHECK-LABEL: Loop access info in function 'test_btc_is_unknown_value': ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l = load i32, ptr %gep.x, align 4 -> -; CHECK-NEXT: store i32 %l, ptr %gep, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %gep = getelementptr i32, ptr %a, i32 %iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %gep.x = getelementptr i32, ptr %a, i32 100 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %a High: (4 + (4 * (zext i32 (-1 + %N) to i64)) + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (400 + %a) High: (404 + %a)) +; CHECK-NEXT: Member: (400 + %a) ; entry: %gep.x = getelementptr i32, ptr %a, i32 100 Index: llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll +++ llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll @@ -126,12 +126,14 @@ define i32 @load_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'load_with_pointer_phi_outside_loop' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %ptr, align 8 -> -; CHECK-NEXT: store double %mul16, double* %arrayidx, align 8 +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ({{.*}}): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv +; CHECK-NEXT: Against group ({{.*}}): +; CHECK-NEXT: %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ] ; entry: br i1 %c.0, label %if.then, label %if.else @@ -164,12 +166,14 @@ define i32 @store_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_outside_loop' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr, align 8 +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ({{.*}}): +; CHECK-NEXT: %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ] +; CHECK-NEXT: Against group ({{.*}}): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv ; entry: br i1 %c.0, label %if.then, label %if.else @@ -202,13 +206,8 @@ define i32 @store_with_pointer_phi_incoming_phi(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr.2, align 8 -; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]): @@ -219,14 +218,27 @@ ; CHECK-NEXT: Comparing group ([[GROUP_C]]): ; CHECK-NEXT: double* %C ; CHECK-NEXT: Against group ([[GROUP_A:.+]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv ; CHECK-NEXT: double* %A ; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group ([[GROUP_C]]): +; CHECK-NEXT: double* %C +; CHECK-NEXT: Against group ([[GROUP_A1:.+]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv +; CHECK-NEXT: Check 3: ; CHECK-NEXT: Comparing group ([[GROUP_B]]): ; CHECK-NEXT: double* %B ; CHECK-NEXT: Against group ([[GROUP_A]]): +; CHECK-NEXT: double* %A +; CHECK-NEXT: Check 4: +; CHECK-NEXT: Comparing group ([[GROUP_B]]): +; CHECK-NEXT: double* %B +; CHECK-NEXT: Against group ([[GROUP_A1]]): ; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv +; CHECK-NEXT: Check 5: +; CHECK-NEXT: Comparing group ([[GROUP_A]]): ; CHECK-NEXT: double* %A +; CHECK-NEXT: Against group ([[GROUP_A1]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[GROUP_C]]: ; CHECK-NEXT: (Low: %C High: (8 + %C)) @@ -235,9 +247,11 @@ ; CHECK-NEXT: (Low: %B High: (8 + %B)) ; CHECK-NEXT: Member: %B ; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %A High: (8 + %A)) +; CHECK-NEXT: Member: %A +; CHECK-NEXT: Group [[GROUP_A1]]: ; CHECK-NEXT: (Low: %A High: (256000 + %A)) ; CHECK-NEXT: Member: {%A,+,8}<%loop.header> -; CHECK-NEXT: Member: %A ; CHECK-EMPTY entry: br label %loop.header @@ -279,13 +293,8 @@ define i32 @store_with_pointer_phi_incoming_phi_irreducible_cycle(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi_irreducible_cycle' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr.3, align 8 -; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]): @@ -296,25 +305,40 @@ ; CHECK-NEXT: Comparing group ([[GROUP_C]]): ; CHECK-NEXT: double* %C ; CHECK-NEXT: Against group ([[GROUP_A:.+]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv ; CHECK-NEXT: double* %A ; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group ([[GROUP_C]]): +; CHECK-NEXT: double* %C +; CHECK-NEXT: Against group ([[GROUP_A1:.+]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv +; CHECK-NEXT: Check 3: ; CHECK-NEXT: Comparing group ([[GROUP_B]]): ; CHECK-NEXT: double* %B ; CHECK-NEXT: Against group ([[GROUP_A]]): +; CHECK-NEXT: double* %A +; CHECK-NEXT: Check 4: +; CHECK-NEXT: Comparing group ([[GROUP_B]]): +; CHECK-NEXT: double* %B +; CHECK-NEXT: Against group ([[GROUP_A1]]): ; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv +; CHECK-NEXT: Check 5: +; CHECK-NEXT: Comparing group ([[GROUP_A]]): ; CHECK-NEXT: double* %A +; CHECK-NEXT: Against group ([[GROUP_A1]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv ; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group [[GROUP_C]] +; CHECK-NEXT: Group [[GROUP_C]]: ; CHECK-NEXT: (Low: %C High: (8 + %C)) ; CHECK-NEXT: Member: %C -; CHECK-NEXT: Group [[GROUP_B]] +; CHECK-NEXT: Group [[GROUP_B]]: ; CHECK-NEXT: (Low: %B High: (8 + %B)) ; CHECK-NEXT: Member: %B -; CHECK-NEXT: Group [[GROUP_A]] +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: %A High: (8 + %A)) +; CHECK-NEXT: Member: %A +; CHECK-NEXT: Group [[GROUP_A1]]: ; CHECK-NEXT: (Low: %A High: (256000 + %A)) ; CHECK-NEXT: Member: {%A,+,8}<%loop.header> -; CHECK-NEXT: Member: %A ; CHECK-EMPTY entry: br label %loop.header @@ -351,13 +375,22 @@ define i32 @store_with_pointer_phi_outside_loop_select(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: 'store_with_pointer_phi_outside_loop_select' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> -; CHECK-NEXT: store double %mul16, double* %ptr, align 8 -; +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ({{.*}}): +; CHECK-NEXT: %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ] +; CHECK-NEXT: Against group ({{.*}}): +; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group {{.*}}: +; CHECK-NEXT: (Low: %ptr High: (8 + %ptr)) +; CHECK-NEXT: Member: %ptr +; CHECK-NEXT: Group {{.*}}: +; CHECK-NEXT: (Low: %A High: (256000 + %A)) +; CHECK-NEXT: Member: {%A,+,8}<%loop.header> +; CHECK-EMPTY entry: br i1 %c.0, label %if.then, label %if.else Index: llvm/test/Analysis/LoopAccessAnalysis/runtime-check-invariant-and-affine.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/LoopAccessAnalysis/runtime-check-invariant-and-affine.ll @@ -0,0 +1,190 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s +%struct.f = type { %struct.c, [0 x %struct.c] } +%struct.c = type { i16, i8 } +%struct.Arrays = type { [128 x double], [128 x double], [128 x double], double } + +@h = local_unnamed_addr global i32 0, align 4 +@g = local_unnamed_addr global %struct.f zeroinitializer, align 2 + +@a = local_unnamed_addr global i32 0, align 4 +@b = global [1 x i32] zeroinitializer, align 4 + +@n = local_unnamed_addr global i32 0, align 4 +@s1 = local_unnamed_addr global %struct.Arrays zeroinitializer, align 8 + +declare double @llvm.fmuladd.f64(double, double, double) #1 + +define void @two_sides_affine() { +; CHECK-LABEL: function 'two_sides_affine' +; CHECK-NEXT: for.body: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %arrayidx = getelementptr [0 x %struct.c], ptr getelementptr inbounds (%struct.f, ptr @g, i64 1, i32 0, i32 0), i64 0, i64 %indvars.iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %b = getelementptr [0 x %struct.c], ptr getelementptr inbounds (%struct.f, ptr @g, i64 1, i32 0, i32 0), i64 0, i64 %indvars.iv, i32 1 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: (4 + (4 * (sext i32 %.pr to i64)) + @g) High: (6 + (4 * (zext i32 (-1 + (-1 * %.pr)) to i64)) + (4 * (sext i32 %.pr to i64)) + @g)) +; CHECK-NEXT: Member: {(4 + (4 * (sext i32 %.pr to i64)) + @g),+,4}<%for.body> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (6 + (4 * (sext i32 %.pr to i64)) + @g) High: (7 + (4 * (zext i32 (-1 + (-1 * %.pr)) to i64)) + (4 * (sext i32 %.pr to i64)) + @g)) +; CHECK-NEXT: Member: {(6 + (4 * (sext i32 %.pr to i64)) + @g),+,4}<%for.body> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: {(4 + (4 * (sext i32 %.pr to i64)) + @g),+,4}<%for.body> Added Flags: +; CHECK-NEXT: {(6 + (4 * (sext i32 %.pr to i64)) + @g),+,4}<%for.body> Added Flags: +; CHECK-EMPTY: + +entry: + %.pr = load i32, ptr @h, align 4 + %tobool.not2 = icmp eq i32 %.pr, 0 + br i1 %tobool.not2, label %for.end, label %for.body.preheader + +for.body.preheader: + %0 = sext i32 %.pr to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr [0 x %struct.c], ptr getelementptr (%struct.f, ptr @g, i64 1, i32 0, i32 0), i64 0, i64 %indvars.iv + %b = getelementptr [0 x %struct.c], ptr getelementptr (%struct.f, ptr @g, i64 1, i32 0, i32 0), i64 0, i64 %indvars.iv, i32 1 + %1 = load i8, ptr %b, align 2 + %conv = zext i8 %1 to i16 + store i16 %conv, ptr %arrayidx, align 2 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %2 = and i64 %indvars.iv.next, 4294967295 + %tobool.not = icmp eq i64 %2, 0 + br i1 %tobool.not, label %for.cond.for.end_crit_edge, label %for.body + +for.cond.for.end_crit_edge: + store i32 0, ptr @h, align 4 + br label %for.end + +for.end: + ret void +} + +define void @srcaffine_sinkinvariant() { +; CHECK-LABEL: function 'srcaffine_sinkinvariant' +; CHECK-NEXT: for.body: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: @b = global [1 x i32] zeroinitializer, align 4 +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: %arrayidx = getelementptr [1 x i32], ptr @b, i64 0, i64 %indvars.iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: @b High: (4 + @b)) +; CHECK-NEXT: Member: @b +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: ((4 * (sext i32 %.pr to i64)) + @b) High: (4 + (4 * (zext i32 (-1 + (-1 * %.pr)) to i64)) + (4 * (sext i32 %.pr to i64)) + @b)) +; CHECK-NEXT: Member: {((4 * (sext i32 %.pr to i64)) + @b),+,4}<%for.body> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +entry: + %.pr = load i32, i32* @a, align 4 + %tobool.not2 = icmp eq i32 %.pr, 0 + br i1 %tobool.not2, label %for.end, label %for.body.preheader + +for.body.preheader: + %0 = sext i32 %.pr to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr [1 x i32], [1 x i32]* @b, i64 0, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %tobool1.not = icmp eq i32 %1, 0 + br i1 %tobool1.not, label %for.inc, label %if.then + +if.then: + store i32 ptrtoint ([1 x i32]* @b to i32), i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 0, i64 0), align 4 + br label %for.inc + +for.inc: + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %2 = trunc i64 %indvars.iv.next to i32 + %tobool.not = icmp eq i32 %2, 0 + br i1 %tobool.not, label %for.cond.for.end_crit_edge, label %for.body + +for.cond.for.end_crit_edge: + store i32 0, i32* @a, align 4 + br label %for.end + +for.end: + ret void +} + +define void @srcinvariant_sinkaffine() { +; CHECK-LABEL: function 'srcinvariant_sinkaffine' +; CHECK-NEXT: for.body: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): +; CHECK-NEXT: %arrayidx4 = getelementptr %struct.Arrays, ptr @s1, i64 0, i32 0, i64 %indvars.iv +; CHECK-NEXT: Against group ([[GROUP_B:.+]]): +; CHECK-NEXT: ptr getelementptr inbounds (%struct.Arrays, ptr @s1, i64 0, i32 3) +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group ([[GROUP_A]]): +; CHECK-NEXT: %arrayidx4 = getelementptr %struct.Arrays, ptr @s1, i64 0, i32 0, i64 %indvars.iv +; CHECK-NEXT: Against group ([[GROUP_C:.+]]): +; CHECK-NEXT: %arrayidx = getelementptr %struct.Arrays, ptr @s1, i64 0, i32 1, i64 %indvars.iv +; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group ([[GROUP_A]]): +; CHECK-NEXT: %arrayidx4 = getelementptr %struct.Arrays, ptr @s1, i64 0, i32 0, i64 %indvars.iv +; CHECK-NEXT: Against group ([[GROUP_D:.+]]): +; CHECK-NEXT: %arrayidx2 = getelementptr %struct.Arrays, ptr @s1, i64 0, i32 2, i64 %indvars.iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GROUP_A]]: +; CHECK-NEXT: (Low: @s1 High: ((8 * (zext i32 %0 to i64)) + @s1)) +; CHECK-NEXT: Member: {@s1,+,8}<%for.body> +; CHECK-NEXT: Group [[GROUP_B]]: +; CHECK-NEXT: (Low: (3072 + @s1) High: (3080 + @s1)) +; CHECK-NEXT: Member: (3072 + @s1) +; CHECK-NEXT: Group [[GROUP_C]]: +; CHECK-NEXT: (Low: (1024 + @s1) High: (1024 + (8 * (zext i32 %0 to i64)) + @s1)) +; CHECK-NEXT: Member: {(1024 + @s1),+,8}<%for.body> +; CHECK-NEXT: Group [[GROUP_D]]: +; CHECK-NEXT: (Low: (2048 + @s1) High: (2048 + (8 * (zext i32 %0 to i64)) + @s1)) +; CHECK-NEXT: Member: {(2048 + @s1),+,8}<%for.body> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: + +entry: + %0 = load i32, i32* @n, align 4 + %cmp10 = icmp sgt i32 %0, 0 + br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext i32 %0 to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %1 = load double, double* getelementptr (%struct.Arrays, %struct.Arrays* @s1, i64 0, i32 3), align 8 + %arrayidx = getelementptr %struct.Arrays, %struct.Arrays* @s1, i64 0, i32 1, i64 %indvars.iv + %2 = load double, double* %arrayidx, align 8 + %arrayidx2 = getelementptr %struct.Arrays, %struct.Arrays* @s1, i64 0, i32 2, i64 %indvars.iv + %3 = load double, double* %arrayidx2, align 8 + %4 = tail call double @llvm.fmuladd.f64(double %2, double %3, double %1) + %arrayidx4 = getelementptr %struct.Arrays, %struct.Arrays* @s1, i64 0, i32 0, i64 %indvars.iv + store double %4, double* %arrayidx4, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} Index: llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll =================================================================== --- llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -9,85 +9,30 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A5:%.*]] = bitcast i32* [[A:%.*]] to i8* -; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] -; CHECK: for.body.lver.check: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP7]], [[TMP17]] -; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] -; CHECK: for.body.ph.lver.orig: -; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] -; CHECK: for.body.lver.orig: -; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 -; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXA_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXB_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]] -; CHECK-NEXT: [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 -; CHECK-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULA_LVER_ORIG]], i32* [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADD_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXD_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADE_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXE_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]] -; CHECK-NEXT: [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULC_LVER_ORIG]], i32* [[ARRAYIDXC_LVER_ORIG]], align 4 -; CHECK-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] -; CHECK: for.body.ph.ldist1: -; CHECK-NEXT: br label [[FOR_BODY_LDIST1:%.*]] -; CHECK: for.body.ldist1: -; CHECK-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 -; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0 -; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4 -; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] -; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 -; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]] -; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !3 -; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] -; CHECK: for.body.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 ; CHECK-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 +; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADA:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4 +; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADB:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4 +; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]] ; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 ; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 -; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD]] +; CHECK-NEXT: store i32 [[MULA]], i32* [[ARRAYIDXA_PLUS_4]], align 4 +; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 -; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 ; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]] -; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit3: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -144,84 +89,30 @@ ; CHECK-NEXT: [[A_INTPTR:%.*]] = ptrtoint i32* [[A_BASE]] to i64 ; CHECK-NEXT: call void @use64(i64 [[A_INTPTR]]) ; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[A_BASE]], i32 42 -; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] -; CHECK: for.body.lver.check: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP7]], [[TMP17]] -; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] -; CHECK: for.body.ph.lver.orig: -; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] -; CHECK: for.body.lver.orig: -; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 -; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXA_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXB_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]] -; CHECK-NEXT: [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 -; CHECK-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULA_LVER_ORIG]], i32* [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADD_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXD_LVER_ORIG]], align 4 -; CHECK-NEXT: [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: [[LOADE_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXE_LVER_ORIG]], align 4 -; CHECK-NEXT: [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]] -; CHECK-NEXT: [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]] -; CHECK-NEXT: store i32 [[MULC_LVER_ORIG]], i32* [[ARRAYIDXC_LVER_ORIG]], align 4 -; CHECK-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] -; CHECK: for.body.ph.ldist1: -; CHECK-NEXT: br label [[FOR_BODY_LDIST1:%.*]] -; CHECK: for.body.ldist1: -; CHECK-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] -; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 -; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !5 -; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4 -; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] -; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 -; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 -; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]] -; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !8 -; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] -; CHECK: for.body.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 ; CHECK-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 +; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADA:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4 +; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[LOADB:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4 +; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]] ; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 ; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 -; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD]] +; CHECK-NEXT: store i32 [[MULA]], i32* [[ARRAYIDXA_PLUS_4]], align 4 +; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 -; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 ; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]] -; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] +; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT]] ; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit2: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -151,34 +151,98 @@ ; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[M:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY3_LR_PH_US_PREHEADER:%.*]], label [[FOR_END15:%.*]] ; CHECK: for.body3.lr.ph.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[K:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[K]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV33:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX9_US]], align 4 -; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX9_US]], align 4 +; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP7]], 3 ; CHECK-NEXT: store i32 [[ADD10_US]], i32* [[ARRAYIDX9_US]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 ; CHECK-NEXT: [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]] ; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop [[LOOP2]] ; CHECK: for.body3.us: -; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ 0, [[FOR_BODY3_LR_PH_US]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 -; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP1]] +; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ [[BC_RESUME_VAL:%.*]], [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 +; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP8]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD4_US]] to i64 -; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 -; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM_US]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP9]], 1 ; CHECK-NEXT: store i32 [[ADD5_US]], i32* [[ARRAYIDX7_US:%.*]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 ; CHECK-NEXT: [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP1:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 -; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP3]], [[K:%.*]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 [[INDVARS_IV33]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDVARS_IV33]], 1 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP10]] +; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast i32* [[SCEVGEP2]] to i8* +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP2]], [[INDVARS_IV33]] +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[SCEVGEP6]], i64 [[TMP13]] +; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8* +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]] +; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 +; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP16]], [[K]] ; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV33]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], [[TMP0]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp slt i32 [[TMP17]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP78]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[ADD_US]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[TMP21]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP25]], align 4, !alias.scope !7 +; CHECK-NEXT: [[TMP26:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP26]], i32 0 +; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4, !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i32 1 +; CHECK-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX7_US]], align 4, !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP26]], i32 2 +; CHECK-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX7_US]], align 4, !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP26]], i32 3 +; CHECK-NEXT: store i32 [[TMP30]], i32* [[ARRAYIDX7_US]], align 4, !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY3_US]] ; CHECK: for.end15.loopexit: ; CHECK-NEXT: br label [[FOR_END15]] Index: llvm/test/Transforms/LoopVectorize/global_alias.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/global_alias.ll +++ llvm/test/Transforms/LoopVectorize/global_alias.ll @@ -777,7 +777,7 @@ ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @mayAlias01( -; CHECK-NOT: add nsw <4 x i32> +; CHECK: add nsw <4 x i32> ; CHECK: ret define i32 @mayAlias01(i32 %a) nounwind { @@ -827,7 +827,7 @@ ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @mayAlias02( -; CHECK-NOT: add nsw <4 x i32> +; CHECK: add nsw <4 x i32> ; CHECK: ret define i32 @mayAlias02(i32 %a) nounwind { Index: llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll +++ llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll @@ -268,9 +268,6 @@ ; } ; } -; CHECK: remark: source.c:83:7: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:82:7 - define void @test_unknown_dep(i64 %n, i32* nocapture %A) !dbg !214 { entry: %cmp8 = icmp sgt i64 %n, 0 @@ -368,25 +365,19 @@ ; YAML-NEXT: Args: ; YAML-NEXT: - String: loop not vectorized ; YAML-NEXT: ... -; YAML-NEXT: --- !Analysis +; YAML-NEXT: --- !Missed ; YAML-NEXT: Pass: loop-vectorize -; YAML-NEXT: Name: UnsafeDep -; YAML-NEXT: DebugLoc: { File: source.c, Line: 83, Column: 7 } +; YAML-NEXT: Name: VectorizationNotBeneficial ; YAML-NEXT: Function: test_unknown_dep ; YAML-NEXT: Args: -; YAML-NEXT: - String: 'loop not vectorized: ' -; YAML-NEXT: - String: 'unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop' -; YAML-NEXT: - String: "\nUnknown data dependence." -; YAML-NEXT: - String: ' Memory location is the same as accessed at ' -; YAML-NEXT: - Location: 'source.c:82:7' -; YAML-NEXT: DebugLoc: { File: source.c, Line: 82, Column: 7 } +; YAML-NEXT: - String: the cost-model indicates that vectorization is not beneficial ; YAML-NEXT: ... ; YAML-NEXT: --- !Missed ; YAML-NEXT: Pass: loop-vectorize -; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: Name: InterleavingNotBeneficial ; YAML-NEXT: Function: test_unknown_dep ; YAML-NEXT: Args: -; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: - String: the cost-model indicates that interleaving is not beneficial ; YAML-NEXT: ... Index: llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll +++ llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll @@ -109,7 +109,8 @@ define i32 @load_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: @load_with_pointer_phi_outside_loop -; CHECK-NOT: vector.body +; CHECK: vector.body +; CHECK: memcheck ; entry: br i1 %c.0, label %if.then, label %if.else @@ -141,7 +142,8 @@ define i32 @store_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: @store_with_pointer_phi_outside_loop -; CHECK-NOT: vector.body +; CHECK: vector.body +; CHECK: memcheck ; entry: br i1 %c.0, label %if.then, label %if.else Index: llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -92,97 +92,418 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [225 x double]* [[A:%.*]] to <225 x double>* ; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>* -; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] -; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV6]] +; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] +; CHECK: for.cond1.preheader.us.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[I]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[CONV6]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[I]], 225 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]]) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV6]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I]], 6 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP25:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP5]] +; CHECK-NEXT: [[SCEVGEP23:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[CONV6]] +; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP19:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 0 +; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 0 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[SCEVGEP21]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult double* [[SCEVGEP19]], [[SCEVGEP17]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND027:%.*]] = icmp ult double* [[SCEVGEP]], [[SCEVGEP25]] +; CHECK-NEXT: [[BOUND128:%.*]] = icmp ult double* [[SCEVGEP23]], [[SCEVGEP17]] +; CHECK-NEXT: [[FOUND_CONFLICT29:%.*]] = and i1 [[BOUND027]], [[BOUND128]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT29]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY4_US_PREHEADER]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT31:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT32:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT31]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i64> poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i64> [[TMP16]], i64 [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP15]], i64 1 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <2 x i64> [[TMP17]], +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <2 x i64> [[TMP19]], +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP20]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP20]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP21]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP24]]) +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP21]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP25]]) +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP12]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP27]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 2 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x double>, <2 x double>* [[TMP29]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP30:%.*]] = fmul <2 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT32]] +; CHECK-NEXT: [[TMP31:%.*]] = fmul <2 x double> [[WIDE_LOAD30]], [[BROADCAST_SPLAT32]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP12]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x double>, <2 x double>* [[TMP33]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 2 +; CHECK-NEXT: [[TMP35:%.*]] = bitcast double* [[TMP34]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x double>, <2 x double>* [[TMP35]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP36:%.*]] = fsub <2 x double> [[WIDE_LOAD33]], [[TMP30]] +; CHECK-NEXT: [[TMP37:%.*]] = fsub <2 x double> [[WIDE_LOAD34]], [[TMP31]] +; CHECK-NEXT: [[TMP38:%.*]] = bitcast double* [[TMP32]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP36]], <2 x double>* [[TMP38]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP34]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP37]], <2 x double>* [[TMP39]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]], label [[FOR_BODY4_US_PREHEADER]] +; CHECK: for.body4.us.preheader: +; CHECK-NEXT: [[K_013_US_PH:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[K_013_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] +; CHECK-NEXT: [[K_013_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY4_US]] ], [ [[K_013_US_PH]], [[FOR_BODY4_US_PREHEADER]] ] ; CHECK-NEXT: [[CONV_US:%.*]] = zext i32 [[K_013_US]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[K_013_US]], 225 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[CONV_US]] -; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, double* [[TMP5]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, double* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP41:%.*]] = icmp ult i32 [[K_013_US]], 225 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[CONV_US]] +; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, double* [[TMP42]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, double* [[TMP7]], align 8 ; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV_US]] -; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, double* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV_US]] +; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, double* [[TMP43]], align 8 ; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]] -; CHECK-NEXT: store double [[SUB_US]], double* [[TMP6]], align 8 +; CHECK-NEXT: store double [[SUB_US]], double* [[TMP43]], align 8 ; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[K_013_US]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i32 [[INC_US]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] +; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[CONV6]], 15 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[I]], 210 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP8]]) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP7]] +; CHECK-NEXT: [[TMP44:%.*]] = add nuw nsw i64 [[CONV6]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = icmp ult i32 [[I]], 210 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP44]] +; CHECK-NEXT: [[MIN_ITERS_CHECK_1:%.*]] = icmp ult i32 [[I]], 6 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_1]], label [[FOR_BODY4_US_PREHEADER_1:%.*]], label [[VECTOR_MEMCHECK_1:%.*]] +; CHECK: vector.memcheck.1: +; CHECK-NEXT: [[TMP47:%.*]] = add nuw nsw i64 [[CONV6]], 16 +; CHECK-NEXT: [[SCEVGEP25_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = add nuw nsw i64 [[CONV6]], 15 +; CHECK-NEXT: [[SCEVGEP23_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP48]] +; CHECK-NEXT: [[TMP49:%.*]] = add nuw nsw i64 [[TMP3]], 16 +; CHECK-NEXT: [[SCEVGEP21_1:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP49]] +; CHECK-NEXT: [[SCEVGEP19_1:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 15 +; CHECK-NEXT: [[SCEVGEP17_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP49]] +; CHECK-NEXT: [[SCEVGEP_1:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 15 +; CHECK-NEXT: [[BOUND0_1:%.*]] = icmp ult double* [[SCEVGEP_1]], [[SCEVGEP21_1]] +; CHECK-NEXT: [[BOUND1_1:%.*]] = icmp ult double* [[SCEVGEP19_1]], [[SCEVGEP17_1]] +; CHECK-NEXT: [[FOUND_CONFLICT_1:%.*]] = and i1 [[BOUND0_1]], [[BOUND1_1]] +; CHECK-NEXT: [[BOUND027_1:%.*]] = icmp ult double* [[SCEVGEP_1]], [[SCEVGEP25_1]] +; CHECK-NEXT: [[BOUND128_1:%.*]] = icmp ult double* [[SCEVGEP23_1]], [[SCEVGEP17_1]] +; CHECK-NEXT: [[FOUND_CONFLICT29_1:%.*]] = and i1 [[BOUND027_1]], [[BOUND128_1]] +; CHECK-NEXT: [[CONFLICT_RDX_1:%.*]] = or i1 [[FOUND_CONFLICT_1]], [[FOUND_CONFLICT29_1]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX_1]], label [[FOR_BODY4_US_PREHEADER_1]], label [[VECTOR_PH_1:%.*]] +; CHECK: vector.ph.1: +; CHECK-NEXT: [[N_VEC_1:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP50:%.*]] = load double, double* [[TMP46]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT31_1:%.*]] = insertelement <2 x double> poison, double [[TMP50]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT32_1:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT31_1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY_1:%.*]] +; CHECK: vector.body.1: +; CHECK-NEXT: [[INDEX_1:%.*]] = phi i32 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[INDEX_1]], 1 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[INDEX_1]], 2 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[INDEX_1]], 3 +; CHECK-NEXT: [[TMP54:%.*]] = zext i32 [[INDEX_1]] to i64 +; CHECK-NEXT: [[TMP55:%.*]] = zext i32 [[TMP51]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP52]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP53]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = add nuw nsw i64 [[TMP54]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = add nuw nsw i64 [[TMP55]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <2 x i64> poison, i64 [[TMP58]], i64 0 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <2 x i64> [[TMP60]], i64 [[TMP59]], i64 1 +; CHECK-NEXT: [[TMP62:%.*]] = add nuw nsw i64 [[TMP56]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = add nuw nsw i64 [[TMP57]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <2 x i64> poison, i64 [[TMP62]], i64 0 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <2 x i64> [[TMP64]], i64 [[TMP63]], i64 1 +; CHECK-NEXT: [[TMP66:%.*]] = icmp ult <2 x i64> [[TMP61]], +; CHECK-NEXT: [[TMP67:%.*]] = icmp ult <2 x i64> [[TMP65]], +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <2 x i1> [[TMP66]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP68]]) +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <2 x i1> [[TMP66]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP69]]) +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <2 x i1> [[TMP67]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP70]]) +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <2 x i1> [[TMP67]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP71]]) +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP58]] +; CHECK-NEXT: [[TMP73:%.*]] = bitcast double* [[TMP72]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, <2 x double>* [[TMP73]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds double, double* [[TMP72]], i64 2 +; CHECK-NEXT: [[TMP75:%.*]] = bitcast double* [[TMP74]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD30_1:%.*]] = load <2 x double>, <2 x double>* [[TMP75]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP76:%.*]] = fmul <2 x double> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT32_1]] +; CHECK-NEXT: [[TMP77:%.*]] = fmul <2 x double> [[WIDE_LOAD30_1]], [[BROADCAST_SPLAT32_1]] +; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP58]] +; CHECK-NEXT: [[TMP79:%.*]] = bitcast double* [[TMP78]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD33_1:%.*]] = load <2 x double>, <2 x double>* [[TMP79]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds double, double* [[TMP78]], i64 2 +; CHECK-NEXT: [[TMP81:%.*]] = bitcast double* [[TMP80]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34_1:%.*]] = load <2 x double>, <2 x double>* [[TMP81]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP82:%.*]] = fsub <2 x double> [[WIDE_LOAD33_1]], [[TMP76]] +; CHECK-NEXT: [[TMP83:%.*]] = fsub <2 x double> [[WIDE_LOAD34_1]], [[TMP77]] +; CHECK-NEXT: [[TMP84:%.*]] = bitcast double* [[TMP78]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP82]], <2 x double>* [[TMP84]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP85:%.*]] = bitcast double* [[TMP80]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP83]], <2 x double>* [[TMP85]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i32 [[INDEX_1]], 4 +; CHECK-NEXT: [[TMP86:%.*]] = icmp eq i32 [[INDEX_NEXT_1]], [[N_VEC_1]] +; CHECK-NEXT: br i1 [[TMP86]], label [[MIDDLE_BLOCK_1:%.*]], label [[VECTOR_BODY_1]], !llvm.loop [[LOOP8]] +; CHECK: middle.block.1: +; CHECK-NEXT: [[CMP_N_1:%.*]] = icmp eq i32 [[N_VEC_1]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]], label [[FOR_BODY4_US_PREHEADER_1]] +; CHECK: for.body4.us.preheader.1: +; CHECK-NEXT: [[K_013_US_PH_1:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK_1]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[N_VEC_1]], [[MIDDLE_BLOCK_1]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] ; CHECK: for.body4.us.1: -; CHECK-NEXT: [[K_013_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ] +; CHECK-NEXT: [[K_013_US_1:%.*]] = phi i32 [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ], [ [[K_013_US_PH_1]], [[FOR_BODY4_US_PREHEADER_1]] ] ; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[K_013_US_1]], 15 -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[NARROW]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[K_013_US_1]], 210 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP10]] -; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, double* [[TMP12]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, double* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP87:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[TMP88:%.*]] = icmp ult i32 [[K_013_US_1]], 210 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP88]]) +; CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP87]] +; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, double* [[TMP89]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, double* [[TMP46]], align 8 ; CHECK-NEXT: [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP10]] -; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, double* [[TMP13]], align 8 +; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP87]] +; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, double* [[TMP90]], align 8 ; CHECK-NEXT: [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]] -; CHECK-NEXT: store double [[SUB_US_1]], double* [[TMP13]], align 8 +; CHECK-NEXT: store double [[SUB_US_1]], double* [[TMP90]], align 8 ; CHECK-NEXT: [[INC_US_1]] = add nuw nsw i32 [[K_013_US_1]], 1 ; CHECK-NEXT: [[CMP2_US_1:%.*]] = icmp ult i32 [[INC_US_1]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_1]], label [[FOR_BODY4_US_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]] +; CHECK-NEXT: br i1 [[CMP2_US_1]], label [[FOR_BODY4_US_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]], !llvm.loop [[LOOP10]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1: -; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[CONV6]], 30 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i32 [[I]], 195 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP14]] +; CHECK-NEXT: [[TMP91:%.*]] = add nuw nsw i64 [[CONV6]], 30 +; CHECK-NEXT: [[TMP92:%.*]] = icmp ult i32 [[I]], 195 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP92]]) +; CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP91]] +; CHECK-NEXT: [[MIN_ITERS_CHECK_2:%.*]] = icmp ult i32 [[I]], 6 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_2]], label [[FOR_BODY4_US_PREHEADER_2:%.*]], label [[VECTOR_MEMCHECK_2:%.*]] +; CHECK: vector.memcheck.2: +; CHECK-NEXT: [[TMP94:%.*]] = add nuw nsw i64 [[CONV6]], 31 +; CHECK-NEXT: [[SCEVGEP25_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP94]] +; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[CONV6]], 30 +; CHECK-NEXT: [[SCEVGEP23_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP95]] +; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[TMP3]], 31 +; CHECK-NEXT: [[SCEVGEP21_2:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP96]] +; CHECK-NEXT: [[SCEVGEP19_2:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 30 +; CHECK-NEXT: [[SCEVGEP17_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP96]] +; CHECK-NEXT: [[SCEVGEP_2:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 30 +; CHECK-NEXT: [[BOUND0_2:%.*]] = icmp ult double* [[SCEVGEP_2]], [[SCEVGEP21_2]] +; CHECK-NEXT: [[BOUND1_2:%.*]] = icmp ult double* [[SCEVGEP19_2]], [[SCEVGEP17_2]] +; CHECK-NEXT: [[FOUND_CONFLICT_2:%.*]] = and i1 [[BOUND0_2]], [[BOUND1_2]] +; CHECK-NEXT: [[BOUND027_2:%.*]] = icmp ult double* [[SCEVGEP_2]], [[SCEVGEP25_2]] +; CHECK-NEXT: [[BOUND128_2:%.*]] = icmp ult double* [[SCEVGEP23_2]], [[SCEVGEP17_2]] +; CHECK-NEXT: [[FOUND_CONFLICT29_2:%.*]] = and i1 [[BOUND027_2]], [[BOUND128_2]] +; CHECK-NEXT: [[CONFLICT_RDX_2:%.*]] = or i1 [[FOUND_CONFLICT_2]], [[FOUND_CONFLICT29_2]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX_2]], label [[FOR_BODY4_US_PREHEADER_2]], label [[VECTOR_PH_2:%.*]] +; CHECK: vector.ph.2: +; CHECK-NEXT: [[N_VEC_2:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP97:%.*]] = load double, double* [[TMP93]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT31_2:%.*]] = insertelement <2 x double> poison, double [[TMP97]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT32_2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT31_2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY_2:%.*]] +; CHECK: vector.body.2: +; CHECK-NEXT: [[INDEX_2:%.*]] = phi i32 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] +; CHECK-NEXT: [[TMP98:%.*]] = or i32 [[INDEX_2]], 1 +; CHECK-NEXT: [[TMP99:%.*]] = or i32 [[INDEX_2]], 2 +; CHECK-NEXT: [[TMP100:%.*]] = or i32 [[INDEX_2]], 3 +; CHECK-NEXT: [[TMP101:%.*]] = zext i32 [[INDEX_2]] to i64 +; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP98]] to i64 +; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP99]] to i64 +; CHECK-NEXT: [[TMP104:%.*]] = zext i32 [[TMP100]] to i64 +; CHECK-NEXT: [[TMP105:%.*]] = add nuw nsw i64 [[TMP101]], 30 +; CHECK-NEXT: [[TMP106:%.*]] = add nuw nsw i64 [[TMP102]], 30 +; CHECK-NEXT: [[TMP107:%.*]] = insertelement <2 x i64> poison, i64 [[TMP105]], i64 0 +; CHECK-NEXT: [[TMP108:%.*]] = insertelement <2 x i64> [[TMP107]], i64 [[TMP106]], i64 1 +; CHECK-NEXT: [[TMP109:%.*]] = add nuw nsw i64 [[TMP103]], 30 +; CHECK-NEXT: [[TMP110:%.*]] = add nuw nsw i64 [[TMP104]], 30 +; CHECK-NEXT: [[TMP111:%.*]] = insertelement <2 x i64> poison, i64 [[TMP109]], i64 0 +; CHECK-NEXT: [[TMP112:%.*]] = insertelement <2 x i64> [[TMP111]], i64 [[TMP110]], i64 1 +; CHECK-NEXT: [[TMP113:%.*]] = icmp ult <2 x i64> [[TMP108]], +; CHECK-NEXT: [[TMP114:%.*]] = icmp ult <2 x i64> [[TMP112]], +; CHECK-NEXT: [[TMP115:%.*]] = extractelement <2 x i1> [[TMP113]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP115]]) +; CHECK-NEXT: [[TMP116:%.*]] = extractelement <2 x i1> [[TMP113]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP116]]) +; CHECK-NEXT: [[TMP117:%.*]] = extractelement <2 x i1> [[TMP114]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP117]]) +; CHECK-NEXT: [[TMP118:%.*]] = extractelement <2 x i1> [[TMP114]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP118]]) +; CHECK-NEXT: [[TMP119:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP105]] +; CHECK-NEXT: [[TMP120:%.*]] = bitcast double* [[TMP119]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, <2 x double>* [[TMP120]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP121:%.*]] = getelementptr inbounds double, double* [[TMP119]], i64 2 +; CHECK-NEXT: [[TMP122:%.*]] = bitcast double* [[TMP121]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD30_2:%.*]] = load <2 x double>, <2 x double>* [[TMP122]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP123:%.*]] = fmul <2 x double> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT32_2]] +; CHECK-NEXT: [[TMP124:%.*]] = fmul <2 x double> [[WIDE_LOAD30_2]], [[BROADCAST_SPLAT32_2]] +; CHECK-NEXT: [[TMP125:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP105]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast double* [[TMP125]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD33_2:%.*]] = load <2 x double>, <2 x double>* [[TMP126]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds double, double* [[TMP125]], i64 2 +; CHECK-NEXT: [[TMP128:%.*]] = bitcast double* [[TMP127]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34_2:%.*]] = load <2 x double>, <2 x double>* [[TMP128]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP129:%.*]] = fsub <2 x double> [[WIDE_LOAD33_2]], [[TMP123]] +; CHECK-NEXT: [[TMP130:%.*]] = fsub <2 x double> [[WIDE_LOAD34_2]], [[TMP124]] +; CHECK-NEXT: [[TMP131:%.*]] = bitcast double* [[TMP125]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP129]], <2 x double>* [[TMP131]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP132:%.*]] = bitcast double* [[TMP127]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP130]], <2 x double>* [[TMP132]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT_2]] = add nuw i32 [[INDEX_2]], 4 +; CHECK-NEXT: [[TMP133:%.*]] = icmp eq i32 [[INDEX_NEXT_2]], [[N_VEC_2]] +; CHECK-NEXT: br i1 [[TMP133]], label [[MIDDLE_BLOCK_2:%.*]], label [[VECTOR_BODY_2]], !llvm.loop [[LOOP8]] +; CHECK: middle.block.2: +; CHECK-NEXT: [[CMP_N_2:%.*]] = icmp eq i32 [[N_VEC_2]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]], label [[FOR_BODY4_US_PREHEADER_2]] +; CHECK: for.body4.us.preheader.2: +; CHECK-NEXT: [[K_013_US_PH_2:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK_2]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[N_VEC_2]], [[MIDDLE_BLOCK_2]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] ; CHECK: for.body4.us.2: -; CHECK-NEXT: [[K_013_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ] -; CHECK-NEXT: [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30 -; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[NARROW17]] to i64 -; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[K_013_US_2]], 195 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]]) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, double* [[TMP19]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, double* [[TMP16]], align 8 +; CHECK-NEXT: [[K_013_US_2:%.*]] = phi i32 [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ], [ [[K_013_US_PH_2]], [[FOR_BODY4_US_PREHEADER_2]] ] +; CHECK-NEXT: [[NARROW35:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30 +; CHECK-NEXT: [[TMP134:%.*]] = zext i32 [[NARROW35]] to i64 +; CHECK-NEXT: [[TMP135:%.*]] = icmp ult i32 [[K_013_US_2]], 195 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP135]]) +; CHECK-NEXT: [[TMP136:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP134]] +; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, double* [[TMP136]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, double* [[TMP93]], align 8 ; CHECK-NEXT: [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, double* [[TMP20]], align 8 +; CHECK-NEXT: [[TMP137:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP134]] +; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, double* [[TMP137]], align 8 ; CHECK-NEXT: [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]] -; CHECK-NEXT: store double [[SUB_US_2]], double* [[TMP20]], align 8 +; CHECK-NEXT: store double [[SUB_US_2]], double* [[TMP137]], align 8 ; CHECK-NEXT: [[INC_US_2]] = add nuw nsw i32 [[K_013_US_2]], 1 ; CHECK-NEXT: [[CMP2_US_2:%.*]] = icmp ult i32 [[INC_US_2]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_2]], label [[FOR_BODY4_US_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]] +; CHECK-NEXT: br i1 [[CMP2_US_2]], label [[FOR_BODY4_US_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]], !llvm.loop [[LOOP10]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2: -; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[CONV6]], 45 -; CHECK-NEXT: [[TMP22:%.*]] = icmp ult i32 [[I]], 180 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP21]] +; CHECK-NEXT: [[TMP138:%.*]] = add nuw nsw i64 [[CONV6]], 45 +; CHECK-NEXT: [[TMP139:%.*]] = icmp ult i32 [[I]], 180 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP139]]) +; CHECK-NEXT: [[TMP140:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP138]] +; CHECK-NEXT: [[MIN_ITERS_CHECK_3:%.*]] = icmp ult i32 [[I]], 6 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_3]], label [[FOR_BODY4_US_PREHEADER_3:%.*]], label [[VECTOR_MEMCHECK_3:%.*]] +; CHECK: vector.memcheck.3: +; CHECK-NEXT: [[TMP141:%.*]] = add nuw nsw i64 [[CONV6]], 46 +; CHECK-NEXT: [[SCEVGEP25_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP141]] +; CHECK-NEXT: [[TMP142:%.*]] = add nuw nsw i64 [[CONV6]], 45 +; CHECK-NEXT: [[SCEVGEP23_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP142]] +; CHECK-NEXT: [[TMP143:%.*]] = add nuw nsw i64 [[TMP3]], 46 +; CHECK-NEXT: [[SCEVGEP21_3:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 [[TMP143]] +; CHECK-NEXT: [[SCEVGEP19_3:%.*]] = getelementptr [225 x double], [225 x double]* [[A]], i64 0, i64 45 +; CHECK-NEXT: [[SCEVGEP17_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 [[TMP143]] +; CHECK-NEXT: [[SCEVGEP_3:%.*]] = getelementptr [225 x double], [225 x double]* [[B]], i64 0, i64 45 +; CHECK-NEXT: [[BOUND0_3:%.*]] = icmp ult double* [[SCEVGEP_3]], [[SCEVGEP21_3]] +; CHECK-NEXT: [[BOUND1_3:%.*]] = icmp ult double* [[SCEVGEP19_3]], [[SCEVGEP17_3]] +; CHECK-NEXT: [[FOUND_CONFLICT_3:%.*]] = and i1 [[BOUND0_3]], [[BOUND1_3]] +; CHECK-NEXT: [[BOUND027_3:%.*]] = icmp ult double* [[SCEVGEP_3]], [[SCEVGEP25_3]] +; CHECK-NEXT: [[BOUND128_3:%.*]] = icmp ult double* [[SCEVGEP23_3]], [[SCEVGEP17_3]] +; CHECK-NEXT: [[FOUND_CONFLICT29_3:%.*]] = and i1 [[BOUND027_3]], [[BOUND128_3]] +; CHECK-NEXT: [[CONFLICT_RDX_3:%.*]] = or i1 [[FOUND_CONFLICT_3]], [[FOUND_CONFLICT29_3]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX_3]], label [[FOR_BODY4_US_PREHEADER_3]], label [[VECTOR_PH_3:%.*]] +; CHECK: vector.ph.3: +; CHECK-NEXT: [[N_VEC_3:%.*]] = and i32 [[I]], 252 +; CHECK-NEXT: [[TMP144:%.*]] = load double, double* [[TMP140]], align 8, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT31_3:%.*]] = insertelement <2 x double> poison, double [[TMP144]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT32_3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT31_3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY_3:%.*]] +; CHECK: vector.body.3: +; CHECK-NEXT: [[INDEX_3:%.*]] = phi i32 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] +; CHECK-NEXT: [[TMP145:%.*]] = or i32 [[INDEX_3]], 1 +; CHECK-NEXT: [[TMP146:%.*]] = or i32 [[INDEX_3]], 2 +; CHECK-NEXT: [[TMP147:%.*]] = or i32 [[INDEX_3]], 3 +; CHECK-NEXT: [[TMP148:%.*]] = zext i32 [[INDEX_3]] to i64 +; CHECK-NEXT: [[TMP149:%.*]] = zext i32 [[TMP145]] to i64 +; CHECK-NEXT: [[TMP150:%.*]] = zext i32 [[TMP146]] to i64 +; CHECK-NEXT: [[TMP151:%.*]] = zext i32 [[TMP147]] to i64 +; CHECK-NEXT: [[TMP152:%.*]] = add nuw nsw i64 [[TMP148]], 45 +; CHECK-NEXT: [[TMP153:%.*]] = add nuw nsw i64 [[TMP149]], 45 +; CHECK-NEXT: [[TMP154:%.*]] = insertelement <2 x i64> poison, i64 [[TMP152]], i64 0 +; CHECK-NEXT: [[TMP155:%.*]] = insertelement <2 x i64> [[TMP154]], i64 [[TMP153]], i64 1 +; CHECK-NEXT: [[TMP156:%.*]] = add nuw nsw i64 [[TMP150]], 45 +; CHECK-NEXT: [[TMP157:%.*]] = add nuw nsw i64 [[TMP151]], 45 +; CHECK-NEXT: [[TMP158:%.*]] = insertelement <2 x i64> poison, i64 [[TMP156]], i64 0 +; CHECK-NEXT: [[TMP159:%.*]] = insertelement <2 x i64> [[TMP158]], i64 [[TMP157]], i64 1 +; CHECK-NEXT: [[TMP160:%.*]] = icmp ult <2 x i64> [[TMP155]], +; CHECK-NEXT: [[TMP161:%.*]] = icmp ult <2 x i64> [[TMP159]], +; CHECK-NEXT: [[TMP162:%.*]] = extractelement <2 x i1> [[TMP160]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP162]]) +; CHECK-NEXT: [[TMP163:%.*]] = extractelement <2 x i1> [[TMP160]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP163]]) +; CHECK-NEXT: [[TMP164:%.*]] = extractelement <2 x i1> [[TMP161]], i64 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP164]]) +; CHECK-NEXT: [[TMP165:%.*]] = extractelement <2 x i1> [[TMP161]], i64 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP165]]) +; CHECK-NEXT: [[TMP166:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP152]] +; CHECK-NEXT: [[TMP167:%.*]] = bitcast double* [[TMP166]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, <2 x double>* [[TMP167]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP168:%.*]] = getelementptr inbounds double, double* [[TMP166]], i64 2 +; CHECK-NEXT: [[TMP169:%.*]] = bitcast double* [[TMP168]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD30_3:%.*]] = load <2 x double>, <2 x double>* [[TMP169]], align 8, !alias.scope !3 +; CHECK-NEXT: [[TMP170:%.*]] = fmul <2 x double> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT32_3]] +; CHECK-NEXT: [[TMP171:%.*]] = fmul <2 x double> [[WIDE_LOAD30_3]], [[BROADCAST_SPLAT32_3]] +; CHECK-NEXT: [[TMP172:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP152]] +; CHECK-NEXT: [[TMP173:%.*]] = bitcast double* [[TMP172]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD33_3:%.*]] = load <2 x double>, <2 x double>* [[TMP173]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP174:%.*]] = getelementptr inbounds double, double* [[TMP172]], i64 2 +; CHECK-NEXT: [[TMP175:%.*]] = bitcast double* [[TMP174]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD34_3:%.*]] = load <2 x double>, <2 x double>* [[TMP175]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP176:%.*]] = fsub <2 x double> [[WIDE_LOAD33_3]], [[TMP170]] +; CHECK-NEXT: [[TMP177:%.*]] = fsub <2 x double> [[WIDE_LOAD34_3]], [[TMP171]] +; CHECK-NEXT: [[TMP178:%.*]] = bitcast double* [[TMP172]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP176]], <2 x double>* [[TMP178]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP179:%.*]] = bitcast double* [[TMP174]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP177]], <2 x double>* [[TMP179]], align 8, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT_3]] = add nuw i32 [[INDEX_3]], 4 +; CHECK-NEXT: [[TMP180:%.*]] = icmp eq i32 [[INDEX_NEXT_3]], [[N_VEC_3]] +; CHECK-NEXT: br i1 [[TMP180]], label [[MIDDLE_BLOCK_3:%.*]], label [[VECTOR_BODY_3]], !llvm.loop [[LOOP8]] +; CHECK: middle.block.3: +; CHECK-NEXT: [[CMP_N_3:%.*]] = icmp eq i32 [[N_VEC_3]], [[I]] +; CHECK-NEXT: br i1 [[CMP_N_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_PREHEADER_3]] +; CHECK: for.body4.us.preheader.3: +; CHECK-NEXT: [[K_013_US_PH_3:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK_3]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[N_VEC_3]], [[MIDDLE_BLOCK_3]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] ; CHECK: for.body4.us.3: -; CHECK-NEXT: [[K_013_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ] -; CHECK-NEXT: [[NARROW18:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45 -; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[NARROW18]] to i64 -; CHECK-NEXT: [[TMP25:%.*]] = icmp ult i32 [[K_013_US_3]], 180 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP25]]) -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP24]] -; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, double* [[TMP26]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, double* [[TMP23]], align 8 +; CHECK-NEXT: [[K_013_US_3:%.*]] = phi i32 [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ], [ [[K_013_US_PH_3]], [[FOR_BODY4_US_PREHEADER_3]] ] +; CHECK-NEXT: [[NARROW36:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45 +; CHECK-NEXT: [[TMP181:%.*]] = zext i32 [[NARROW36]] to i64 +; CHECK-NEXT: [[TMP182:%.*]] = icmp ult i32 [[K_013_US_3]], 180 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP182]]) +; CHECK-NEXT: [[TMP183:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP181]] +; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, double* [[TMP183]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, double* [[TMP140]], align 8 ; CHECK-NEXT: [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP24]] -; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, double* [[TMP27]], align 8 +; CHECK-NEXT: [[TMP184:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP181]] +; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, double* [[TMP184]], align 8 ; CHECK-NEXT: [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]] -; CHECK-NEXT: store double [[SUB_US_3]], double* [[TMP27]], align 8 +; CHECK-NEXT: store double [[SUB_US_3]], double* [[TMP184]], align 8 ; CHECK-NEXT: [[INC_US_3]] = add nuw nsw i32 [[K_013_US_3]], 1 ; CHECK-NEXT: [[CMP2_US_3:%.*]] = icmp ult i32 [[INC_US_3]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP10]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll +++ llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll @@ -12,19 +12,53 @@ ; CHECK-NEXT: [[DATA:%.*]] = alloca [2 x i8], align 2 ; CHECK-NEXT: store i16 [[TMP0:%.*]], ptr [[DATA]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[X:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 1, [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt ptr [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[TMP4]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DATA]], i64 [[X]] +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 2, [[X]] +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 [[TMP5]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UGLYGEP1]], [[UGLYGEP]] +; CHECK-NEXT: [[OR_COND7:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[BOUND1]] +; CHECK-NEXT: br i1 [[OR_COND7]], label [[BB6_I_I_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[X]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = sub nsw i64 0, [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[TMP6]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 2, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP9]], align 2, !alias.scope !3 +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD3]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: store <4 x i8> [[REVERSE]], ptr [[TMP7]], align 2, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: store <4 x i8> [[REVERSE4]], ptr [[TMP9]], align 2, !alias.scope !3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[X]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[BB6_I_I_PREHEADER]] +; CHECK: bb6.i.i.preheader: +; CHECK-NEXT: [[ITER_SROA_0_07_I_I_PH:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[BB6_I_I:%.*]] ; CHECK: bb6.i.i: -; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ] +; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP11:%.*]], [[BB6_I_I]] ], [ [[ITER_SROA_0_07_I_I_PH]], [[BB6_I_I_PREHEADER]] ] ; CHECK-NEXT: [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]] -; CHECK-NEXT: [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1 +; CHECK-NEXT: [[TMP11]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1 ; CHECK-NEXT: [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]] ; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[_40_I_I]] ; CHECK-NEXT: [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1 ; CHECK-NEXT: [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1 ; CHECK-NEXT: store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1 ; CHECK-NEXT: store i8 [[TMP_0_COPYLOAD_I_I_I_I]], ptr [[_39_I_I]], align 1 -; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP2]], [[X:%.*]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT:%.*]], label [[BB6_I_I]] +; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP11]], [[X]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT]], label [[BB6_I_I]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[DOTSROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[DATA]], align 2 ; CHECK-NEXT: ret i16 [[DOTSROA_0_0_COPYLOAD]]