Index: llvm/lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -2071,6 +2071,13 @@ if (Pred->succ_size() > 2) continue; + // If we can't actually analyze the branch, we needn't move it to the top of + // a loop. + MachineBasicBlock *TBB, *FBB; + SmallVector Cond; + if (TII->analyzeBranch(*Pred, TBB, FBB, Cond, false)) + continue; + MachineBasicBlock *OtherBB = nullptr; if (Pred->succ_size() == 2) { OtherBB = *Pred->succ_begin(); Index: llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll =================================================================== --- llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll +++ llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll @@ -9,15 +9,14 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r2, 0(%r3) -; CHECK-NEXT: j .LBB0_2 -; CHECK-NEXT: .LBB0_1: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB0_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB0_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB0_1 %res = atomicrmw min i32 *%src, i32 %b seq_cst ret i32 %res @@ -28,15 +27,14 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r2, 0(%r3) -; CHECK-NEXT: j .LBB1_2 -; CHECK-NEXT: .LBB1_1: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjhe %r2, %r4, .LBB1_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: crjhe %r2, %r4, .LBB1_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB1_1 %res = atomicrmw max i32 *%src, i32 %b seq_cst ret i32 %res @@ -47,15 +45,14 @@ ; CHECK-LABEL: f3: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r2, 0(%r3) -; CHECK-NEXT: j .LBB2_2 -; CHECK-NEXT: .LBB2_1: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB2_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: clrjle %r2, %r4, .LBB2_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: clrjle %r2, %r4, .LBB2_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB2_3: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB2_1 %res = atomicrmw umin i32 *%src, i32 %b seq_cst ret i32 %res @@ -66,15 +63,14 @@ ; CHECK-LABEL: f4: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r2, 0(%r3) -; CHECK-NEXT: j .LBB3_2 -; CHECK-NEXT: .LBB3_1: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB3_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: clrjhe %r2, %r4, .LBB3_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: clrjhe %r2, %r4, .LBB3_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB3_3: # in Loop: Header=BB3_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB3_1 %res = atomicrmw umax i32 *%src, i32 %b seq_cst ret i32 %res @@ -85,15 +81,14 @@ ; CHECK-LABEL: f5: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r2, 4092(%r3) -; CHECK-NEXT: j .LBB4_2 -; CHECK-NEXT: .LBB4_1: # in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 4092(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB4_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB4_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB4_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 4092(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB4_1 %ptr = getelementptr i32, i32 *%src, i64 1023 %res = atomicrmw min i32 *%ptr, i32 %b seq_cst @@ -105,15 +100,14 @@ ; CHECK-LABEL: f6: ; CHECK: # %bb.0: ; CHECK-NEXT: ly %r2, 4096(%r3) -; CHECK-NEXT: j .LBB5_2 -; CHECK-NEXT: .LBB5_1: # in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: csy %r2, %r0, 4096(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB5_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB5_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB5_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1 +; CHECK-NEXT: csy %r2, %r0, 4096(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB5_1 %ptr = getelementptr i32, i32 *%src, i64 1024 %res = atomicrmw min i32 *%ptr, i32 %b seq_cst @@ -125,15 +119,14 @@ ; CHECK-LABEL: f7: ; CHECK: # %bb.0: ; CHECK-NEXT: ly %r2, 524284(%r3) -; CHECK-NEXT: j .LBB6_2 -; CHECK-NEXT: .LBB6_1: # in Loop: Header=BB6_2 Depth=1 -; CHECK-NEXT: csy %r2, %r0, 524284(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB6_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB6_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB6_3: # in Loop: Header=BB6_1 Depth=1 +; CHECK-NEXT: csy %r2, %r0, 524284(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB6_1 %ptr = getelementptr i32, i32 *%src, i64 131071 %res = atomicrmw min i32 *%ptr, i32 %b seq_cst @@ -146,15 +139,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: agfi %r3, 524288 ; CHECK-NEXT: l %r2, 0(%r3) -; CHECK-NEXT: j .LBB7_2 -; CHECK-NEXT: .LBB7_1: # in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB7_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB7_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB7_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB7_3: # in Loop: Header=BB7_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB7_1 %ptr = getelementptr i32, i32 *%src, i64 131072 %res = atomicrmw min i32 *%ptr, i32 %b seq_cst @@ -166,15 +158,14 @@ ; CHECK-LABEL: f9: ; CHECK: # %bb.0: ; CHECK-NEXT: ly %r2, -4(%r3) -; CHECK-NEXT: j .LBB8_2 -; CHECK-NEXT: .LBB8_1: # in Loop: Header=BB8_2 Depth=1 -; CHECK-NEXT: csy %r2, %r0, -4(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB8_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB8_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB8_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB8_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: csy %r2, %r0, -4(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB8_1 %ptr = getelementptr i32, i32 *%src, i64 -1 %res = atomicrmw min i32 *%ptr, i32 %b seq_cst @@ -186,15 +177,14 @@ ; CHECK-LABEL: f10: ; CHECK: # %bb.0: ; CHECK-NEXT: ly %r2, -524288(%r3) -; CHECK-NEXT: j .LBB9_2 -; CHECK-NEXT: .LBB9_1: # in Loop: Header=BB9_2 Depth=1 -; CHECK-NEXT: csy %r2, %r0, -524288(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB9_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB9_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB9_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB9_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1 +; CHECK-NEXT: csy %r2, %r0, -524288(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB9_1 %ptr = getelementptr i32, i32 *%src, i64 -131072 %res = atomicrmw min i32 *%ptr, i32 %b seq_cst @@ -207,15 +197,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: agfi %r3, -524292 ; CHECK-NEXT: l %r2, 0(%r3) -; CHECK-NEXT: j .LBB10_2 -; CHECK-NEXT: .LBB10_1: # in Loop: Header=BB10_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB10_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r4, .LBB10_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB10_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r4, .LBB10_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB10_1 %ptr = getelementptr i32, i32 *%src, i64 -131073 %res = atomicrmw min i32 *%ptr, i32 %b seq_cst @@ -228,15 +217,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: agr %r3, %r4 ; CHECK-NEXT: l %r2, 0(%r3) -; CHECK-NEXT: j .LBB11_2 -; CHECK-NEXT: .LBB11_1: # in Loop: Header=BB11_2 Depth=1 -; CHECK-NEXT: cs %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB11_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r0, %r2 -; CHECK-NEXT: crjle %r2, %r5, .LBB11_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB11_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r5, .LBB11_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ; CHECK-NEXT: lr %r0, %r5 +; CHECK-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB11_1 %add = add i64 %base, %index %ptr = inttoptr i64 %add to i32 * @@ -250,15 +238,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: l %r2, 0(%r3) ; CHECK-NEXT: lhi %r0, 42 -; CHECK-NEXT: j .LBB12_2 -; CHECK-NEXT: .LBB12_1: # in Loop: Header=BB12_2 Depth=1 -; CHECK-NEXT: cs %r2, %r1, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB12_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lr %r1, %r2 -; CHECK-NEXT: crjle %r2, %r0, .LBB12_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB12_2 Depth=1 +; CHECK-NEXT: crjle %r2, %r0, .LBB12_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ; CHECK-NEXT: lhi %r1, 42 +; CHECK-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; CHECK-NEXT: cs %r2, %r1, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB12_1 %res = atomicrmw min i32 *%ptr, i32 42 seq_cst ret i32 %res Index: llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll =================================================================== --- llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll +++ llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll @@ -9,15 +9,14 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: lg %r2, 0(%r3) -; CHECK-NEXT: j .LBB0_2 -; CHECK-NEXT: .LBB0_1: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: cgrjle %r2, %r4, .LBB0_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB0_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB0_1 %res = atomicrmw min i64 *%src, i64 %b seq_cst ret i64 %res @@ -28,15 +27,14 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: lg %r2, 0(%r3) -; CHECK-NEXT: j .LBB1_2 -; CHECK-NEXT: .LBB1_1: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: cgrjhe %r2, %r4, .LBB1_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: cgrjhe %r2, %r4, .LBB1_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB1_1 %res = atomicrmw max i64 *%src, i64 %b seq_cst ret i64 %res @@ -47,15 +45,14 @@ ; CHECK-LABEL: f3: ; CHECK: # %bb.0: ; CHECK-NEXT: lg %r2, 0(%r3) -; CHECK-NEXT: j .LBB2_2 -; CHECK-NEXT: .LBB2_1: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB2_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: clgrjle %r2, %r4, .LBB2_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: clgrjle %r2, %r4, .LBB2_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB2_3: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB2_1 %res = atomicrmw umin i64 *%src, i64 %b seq_cst ret i64 %res @@ -66,15 +63,14 @@ ; CHECK-LABEL: f4: ; CHECK: # %bb.0: ; CHECK-NEXT: lg %r2, 0(%r3) -; CHECK-NEXT: j .LBB3_2 -; CHECK-NEXT: .LBB3_1: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB3_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: clgrjhe %r2, %r4, .LBB3_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: clgrjhe %r2, %r4, .LBB3_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB3_3: # in Loop: Header=BB3_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB3_1 %res = atomicrmw umax i64 *%src, i64 %b seq_cst ret i64 %res @@ -85,15 +81,14 @@ ; CHECK-LABEL: f5: ; CHECK: # %bb.0: ; CHECK-NEXT: lg %r2, 524280(%r3) -; CHECK-NEXT: j .LBB4_2 -; CHECK-NEXT: .LBB4_1: # in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 524280(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB4_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: cgrjle %r2, %r4, .LBB4_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB4_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 524280(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB4_1 %ptr = getelementptr i64, i64 *%src, i64 65535 %res = atomicrmw min i64 *%ptr, i64 %b seq_cst @@ -106,15 +101,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: agfi %r3, 524288 ; CHECK-NEXT: lg %r2, 0(%r3) -; CHECK-NEXT: j .LBB5_2 -; CHECK-NEXT: .LBB5_1: # in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB5_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: cgrjle %r2, %r4, .LBB5_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB5_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB5_1 %ptr = getelementptr i64, i64 *%src, i64 65536 %res = atomicrmw min i64 *%ptr, i64 %b seq_cst @@ -126,15 +120,14 @@ ; CHECK-LABEL: f7: ; CHECK: # %bb.0: ; CHECK-NEXT: lg %r2, -524288(%r3) -; CHECK-NEXT: j .LBB6_2 -; CHECK-NEXT: .LBB6_1: # in Loop: Header=BB6_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, -524288(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: cgrjle %r2, %r4, .LBB6_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB6_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB6_3: # in Loop: Header=BB6_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, -524288(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB6_1 %ptr = getelementptr i64, i64 *%src, i64 -65536 %res = atomicrmw min i64 *%ptr, i64 %b seq_cst @@ -147,15 +140,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: agfi %r3, -524296 ; CHECK-NEXT: lg %r2, 0(%r3) -; CHECK-NEXT: j .LBB7_2 -; CHECK-NEXT: .LBB7_1: # in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB7_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: cgrjle %r2, %r4, .LBB7_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB7_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: .LBB7_3: # in Loop: Header=BB7_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB7_1 %ptr = getelementptr i64, i64 *%src, i64 -65537 %res = atomicrmw min i64 *%ptr, i64 %b seq_cst @@ -168,15 +160,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: agr %r3, %r4 ; CHECK-NEXT: lg %r2, 0(%r3) -; CHECK-NEXT: j .LBB8_2 -; CHECK-NEXT: .LBB8_1: # in Loop: Header=BB8_2 Depth=1 -; CHECK-NEXT: csg %r2, %r0, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB8_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r0, %r2 -; CHECK-NEXT: cgrjle %r2, %r5, .LBB8_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB8_2 Depth=1 +; CHECK-NEXT: cgrjle %r2, %r5, .LBB8_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ; CHECK-NEXT: lgr %r0, %r5 +; CHECK-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB8_1 %add = add i64 %base, %index %ptr = inttoptr i64 %add to i64 * @@ -190,15 +181,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lg %r2, 0(%r3) ; CHECK-NEXT: lghi %r0, 42 -; CHECK-NEXT: j .LBB9_2 -; CHECK-NEXT: .LBB9_1: # in Loop: Header=BB9_2 Depth=1 -; CHECK-NEXT: csg %r2, %r1, 0(%r3) -; CHECK-NEXT: ber %r14 -; CHECK-NEXT: .LBB9_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lgr %r1, %r2 -; CHECK-NEXT: cgrjle %r2, %r0, .LBB9_1 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB9_2 Depth=1 +; CHECK-NEXT: cgrjle %r2, %r0, .LBB9_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ; CHECK-NEXT: lghi %r1, 42 +; CHECK-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1 +; CHECK-NEXT: csg %r2, %r1, 0(%r3) +; CHECK-NEXT: ber %r14 ; CHECK-NEXT: j .LBB9_1 %res = atomicrmw min i64 *%ptr, i64 42 seq_cst ret i64 %res Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -328,35 +328,9 @@ ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vdup.32 q2, r12 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill -; CHECK-NEXT: b .LBB2_4 -; CHECK-NEXT: .LBB2_2: @ %cond.load25 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmovx.f16 s0, s28 -; CHECK-NEXT: vmov r4, s28 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q6[0], r4 -; CHECK-NEXT: vldr.16 s0, [r1, #6] -; CHECK-NEXT: vmov.16 q6[1], r2 -; CHECK-NEXT: vmov r2, s29 -; CHECK-NEXT: vmov.16 q6[2], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q6[3], r2 -; CHECK-NEXT: .LBB2_3: @ %else26 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmul.f16 q0, q6, q5 -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: vcvtt.f32.f16 s23, s1 -; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: vcvtb.f32.f16 s22, s1 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vcvtt.f32.f16 s21, s0 -; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: vcvtb.f32.f16 s20, s0 -; CHECK-NEXT: vadd.f32 q5, q3, q5 -; CHECK-NEXT: bne .LBB2_4 -; CHECK-NEXT: b .LBB2_21 -; CHECK-NEXT: .LBB2_4: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload ; CHECK-NEXT: vmov q3, q5 @@ -378,13 +352,13 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: bfi r2, r4, #3, #1 ; CHECK-NEXT: lsls r4, r2, #31 -; CHECK-NEXT: bne .LBB2_9 -; CHECK-NEXT: @ %bb.5: @ %else -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bne .LBB2_7 +; CHECK-NEXT: @ %bb.3: @ %else +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bpl .LBB2_10 -; CHECK-NEXT: .LBB2_6: @ %cond.load6 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bpl .LBB2_8 +; CHECK-NEXT: .LBB2_4: @ %cond.load6 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vldr.16 s20, [r0, #2] ; CHECK-NEXT: vmov r5, s24 ; CHECK-NEXT: vmovx.f16 s24, s25 @@ -396,25 +370,25 @@ ; CHECK-NEXT: vmov r4, s24 ; CHECK-NEXT: vmov.16 q5[3], r4 ; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bmi .LBB2_11 -; CHECK-NEXT: .LBB2_7: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bmi .LBB2_9 +; CHECK-NEXT: .LBB2_5: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmov q6, q5 ; CHECK-NEXT: lsls r2, r2, #28 -; CHECK-NEXT: bmi .LBB2_12 -; CHECK-NEXT: .LBB2_8: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bmi .LBB2_10 +; CHECK-NEXT: .LBB2_6: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: b .LBB2_13 -; CHECK-NEXT: .LBB2_9: @ %cond.load -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: b .LBB2_11 +; CHECK-NEXT: .LBB2_7: @ %cond.load +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vldr.16 s24, [r0] ; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bmi .LBB2_6 -; CHECK-NEXT: .LBB2_10: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bmi .LBB2_4 +; CHECK-NEXT: .LBB2_8: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmov q5, q6 ; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bpl .LBB2_7 -; CHECK-NEXT: .LBB2_11: @ %cond.load9 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bpl .LBB2_5 +; CHECK-NEXT: .LBB2_9: @ %cond.load9 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmovx.f16 s24, s20 ; CHECK-NEXT: vmov r4, s20 ; CHECK-NEXT: vldr.16 s28, [r0, #4] @@ -427,9 +401,9 @@ ; CHECK-NEXT: vmov r4, s20 ; CHECK-NEXT: vmov.16 q6[3], r4 ; CHECK-NEXT: lsls r2, r2, #28 -; CHECK-NEXT: bpl .LBB2_8 -; CHECK-NEXT: .LBB2_12: @ %cond.load12 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bpl .LBB2_6 +; CHECK-NEXT: .LBB2_10: @ %cond.load12 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmovx.f16 s20, s24 ; CHECK-NEXT: vmov r4, s24 ; CHECK-NEXT: vmov r2, s20 @@ -440,8 +414,8 @@ ; CHECK-NEXT: vmov.16 q5[2], r2 ; CHECK-NEXT: vmov r2, s24 ; CHECK-NEXT: vmov.16 q5[3], r2 -; CHECK-NEXT: .LBB2_13: @ %else13 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: .LBB2_11: @ %else13 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vcmp.u32 cs, q2, q4 ; CHECK-NEXT: @ implicit-def: $q7 ; CHECK-NEXT: vmrs r4, p0 @@ -459,13 +433,13 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: bfi r2, r4, #3, #1 ; CHECK-NEXT: lsls r4, r2, #31 -; CHECK-NEXT: bne .LBB2_17 -; CHECK-NEXT: @ %bb.14: @ %else17 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bne .LBB2_16 +; CHECK-NEXT: @ %bb.12: @ %else17 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bpl .LBB2_18 -; CHECK-NEXT: .LBB2_15: @ %cond.load19 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bpl .LBB2_17 +; CHECK-NEXT: .LBB2_13: @ %cond.load19 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vldr.16 s24, [r1, #2] ; CHECK-NEXT: vmov r5, s28 ; CHECK-NEXT: vmovx.f16 s28, s29 @@ -477,23 +451,25 @@ ; CHECK-NEXT: vmov r4, s28 ; CHECK-NEXT: vmov.16 q6[3], r4 ; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bmi .LBB2_19 -; CHECK-NEXT: .LBB2_16: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bmi .LBB2_18 +; CHECK-NEXT: .LBB2_14: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmov q7, q6 ; CHECK-NEXT: lsls r2, r2, #28 -; CHECK-NEXT: bmi.w .LBB2_2 +; CHECK-NEXT: bmi .LBB2_19 +; CHECK-NEXT: .LBB2_15: @ in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: vmov q6, q7 ; CHECK-NEXT: b .LBB2_20 -; CHECK-NEXT: .LBB2_17: @ %cond.load16 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: .LBB2_16: @ %cond.load16 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vldr.16 s28, [r1] ; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bmi .LBB2_15 -; CHECK-NEXT: .LBB2_18: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bmi .LBB2_13 +; CHECK-NEXT: .LBB2_17: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmov q6, q7 ; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bpl .LBB2_16 -; CHECK-NEXT: .LBB2_19: @ %cond.load22 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bpl .LBB2_14 +; CHECK-NEXT: .LBB2_18: @ %cond.load22 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmovx.f16 s28, s24 ; CHECK-NEXT: vmov r4, s24 ; CHECK-NEXT: vldr.16 s0, [r1, #4] @@ -506,11 +482,32 @@ ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vmov.16 q7[3], r4 ; CHECK-NEXT: lsls r2, r2, #28 -; CHECK-NEXT: bmi.w .LBB2_2 -; CHECK-NEXT: .LBB2_20: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmov q6, q7 -; CHECK-NEXT: b .LBB2_3 -; CHECK-NEXT: .LBB2_21: @ %middle.block +; CHECK-NEXT: bpl .LBB2_15 +; CHECK-NEXT: .LBB2_19: @ %cond.load25 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: vmovx.f16 s0, s28 +; CHECK-NEXT: vmov r4, s28 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov.16 q6[0], r4 +; CHECK-NEXT: vldr.16 s0, [r1, #6] +; CHECK-NEXT: vmov.16 q6[1], r2 +; CHECK-NEXT: vmov r2, s29 +; CHECK-NEXT: vmov.16 q6[2], r2 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov.16 q6[3], r2 +; CHECK-NEXT: .LBB2_20: @ %else26 +; CHECK-NEXT: @ in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: vmul.f16 q0, q6, q5 +; CHECK-NEXT: adds r0, #8 +; CHECK-NEXT: vcvtt.f32.f16 s23, s1 +; CHECK-NEXT: adds r1, #8 +; CHECK-NEXT: vcvtb.f32.f16 s22, s1 +; CHECK-NEXT: adds r3, #4 +; CHECK-NEXT: vcvtt.f32.f16 s21, s0 +; CHECK-NEXT: vcvtb.f32.f16 s20, s0 +; CHECK-NEXT: vadd.f32 q5, q3, q5 +; CHECK-NEXT: le lr, .LBB2_2 +; CHECK-NEXT: @ %bb.21: @ %middle.block ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: vcmp.u32 cs, q0, q4 ; CHECK-NEXT: vpsel q0, q5, q3