diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1268,10 +1268,18 @@ return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) - return true; + // For remat instructions which are inside current working loop, we should + // always hoist them. + // For remat instructions which intend to be hoisted to outer parent loop, we + // only hoist non-cheap ones as RA can not pull all remat instructions down to + // inner loop as it will first try to split them in outer loop. + if (TII->isTriviallyReMaterializable(MI, AA)) { + if ((MLI->getLoopFor(MI.getParent()) == CurLoop) || + !(CheapInstr && !HoistCheapInsts)) + return true; + else + return false; + } // FIXME: If there are long latency loop-invariant instructions inside the // loop at this point, why didn't the optimizer's LICM hoist them? diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll --- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -141,8 +141,6 @@ ; SI-NEXT: ; %bb.1: ; %outer_loop.preheader ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: BB3_2: ; %outer_loop ; SI-NEXT: ; =>This Loop Header: Depth=1 ; SI-NEXT: ; Child Loop BB3_3 Depth 2 @@ -151,6 +149,8 @@ ; SI-NEXT: ; Parent Loop BB3_2 Depth=1 ; SI-NEXT: ; => This Inner Loop Header: Depth=2 ; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1] +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -48,9 +48,9 @@ ; GCN-NEXT: s_endpgm ; IR-LABEL: @reduced_nested_loop_conditions( ; IR-NEXT: bb: -; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() [[ATTR4:#.*]] ; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* [[ARG:%.*]], i32 [[MY_TMP]] -; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]] +; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]], align 4 ; IR-NEXT: br label [[BB5:%.*]] ; IR: bb3: ; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]] @@ -84,7 +84,7 @@ ; IR: bb16: ; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1 ; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 [[MY_TMP17]] -; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]] +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]], align 4 ; IR-NEXT: br label [[BB20]] ; IR: bb20: ; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ] @@ -155,6 +155,8 @@ ; GCN-NEXT: s_cbranch_vccnz BB1_6 ; GCN-NEXT: ; %bb.1: ; %bb14.lr.ph ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_branch BB1_3 ; GCN-NEXT: BB1_2: ; in Loop: Header=BB1_3 Depth=1 ; GCN-NEXT: s_mov_b64 s[0:1], -1 @@ -170,7 +172,7 @@ ; GCN-NEXT: BB1_4: ; %bb18 ; GCN-NEXT: ; Parent Loop BB1_3 Depth=1 ; GCN-NEXT: ; => This Inner Loop Header: Depth=2 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0 ; GCN-NEXT: s_and_b64 vcc, exec, vcc @@ -189,19 +191,19 @@ ; GCN-NEXT: s_endpgm ; IR-LABEL: @nested_loop_conditions( ; IR-NEXT: bb: -; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() [[ATTR4]] ; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64 ; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG:%.*]], i64 [[MY_TMP1]] ; IR-NEXT: [[MY_TMP3:%.*]] = load i64, i64 addrspace(1)* [[MY_TMP2]], align 16 ; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 ; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0 -; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9 ; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]] ; IR: bb14.lr.ph: ; IR-NEXT: br label [[BB14:%.*]] ; IR: Flow3: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP21:%.*]]) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20:%.*]]) ; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]]) ; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 ; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 @@ -235,15 +237,15 @@ ; IR: Flow1: ; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ] ; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ] -; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP18:%.*]], [[BB21]] ], [ true, [[BB14]] ] -; IR-NEXT: [[TMP14]] = phi i1 [ [[TMP18]], [[BB21]] ], [ false, [[BB14]] ] +; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12_INV:%.*]], [[BB21]] ], [ true, [[BB14]] ] +; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12_INV]], [[BB21]] ], [ false, [[BB14]] ] ; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ] ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]]) ; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) ; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]]) ; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]] ; IR: bb18: -; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9 ; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]] ; IR: bb21: @@ -260,21 +262,21 @@ ; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 [[MY_TMP7]] ; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16 ; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 -; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP12:%.*]] = icmp slt i32 [[MY_TMP11]], 9 -; IR-NEXT: [[TMP18]] = xor i1 [[MY_TMP12]], true +; IR-NEXT: [[MY_TMP12_INV]] = xor i1 [[MY_TMP12]], true ; IR-NEXT: br label [[FLOW1]] ; IR: Flow2: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) -; IR-NEXT: [[TMP19:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) -; IR-NEXT: [[TMP20:%.*]] = extractvalue { i1, i64 } [[TMP19]], 0 -; IR-NEXT: [[TMP21]] = extractvalue { i1, i64 } [[TMP19]], 1 -; IR-NEXT: br i1 [[TMP20]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]] +; IR-NEXT: [[TMP18:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) +; IR-NEXT: [[TMP19:%.*]] = extractvalue { i1, i64 } [[TMP18]], 0 +; IR-NEXT: [[TMP20]] = extractvalue { i1, i64 } [[TMP18]], 1 +; IR-NEXT: br i1 [[TMP19]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]] ; IR: bb31.loopexit: ; IR-NEXT: br label [[FLOW3]] ; IR: bb31: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]]) -; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef +; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef, align 4 ; IR-NEXT: ret void bb: %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -162,12 +162,12 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { ; SI-LABEL: loop_land_info_assert: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SI-NEXT: s_load_dword s8, s[0:1], 0xc -; SI-NEXT: s_brev_b32 s9, 44 +; SI-NEXT: s_load_dword s6, s[0:1], 0xc +; SI-NEXT: s_brev_b32 s7, 44 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 ; SI-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 @@ -176,52 +176,54 @@ ; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] ; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3] ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 +; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s7 ; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5] ; SI-NEXT: v_mov_b32_e32 v0, 3 ; SI-NEXT: s_branch BB3_4 ; SI-NEXT: BB3_1: ; %Flow6 ; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; SI-NEXT: s_mov_b64 s[10:11], 0 +; SI-NEXT: s_mov_b64 s[8:9], 0 ; SI-NEXT: BB3_2: ; %Flow5 ; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; SI-NEXT: s_mov_b64 s[14:15], 0 +; SI-NEXT: s_mov_b64 s[12:13], 0 ; SI-NEXT: BB3_3: ; %Flow ; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; SI-NEXT: s_and_b64 vcc, exec, s[12:13] +; SI-NEXT: s_and_b64 vcc, exec, s[10:11] ; SI-NEXT: s_cbranch_vccnz BB3_8 ; SI-NEXT: BB3_4: ; %while.cond ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: s_mov_b64 s[14:15], -1 -; SI-NEXT: s_mov_b64 s[10:11], -1 ; SI-NEXT: s_mov_b64 s[12:13], -1 +; SI-NEXT: s_mov_b64 s[8:9], -1 +; SI-NEXT: s_mov_b64 s[10:11], -1 ; SI-NEXT: s_mov_b64 vcc, s[0:1] ; SI-NEXT: s_cbranch_vccz BB3_3 ; SI-NEXT: ; %bb.5: ; %convex.exit ; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 +; SI-NEXT: s_mov_b64 s[8:9], -1 ; SI-NEXT: s_mov_b64 s[10:11], -1 -; SI-NEXT: s_mov_b64 s[12:13], -1 ; SI-NEXT: s_mov_b64 vcc, s[2:3] ; SI-NEXT: s_cbranch_vccz BB3_2 ; SI-NEXT: ; %bb.6: ; %if.end ; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; SI-NEXT: s_mov_b64 s[12:13], -1 +; SI-NEXT: s_mov_b64 s[10:11], -1 ; SI-NEXT: s_mov_b64 vcc, s[4:5] ; SI-NEXT: s_cbranch_vccz BB3_1 ; SI-NEXT: ; %bb.7: ; %if.else ; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; SI-NEXT: s_mov_b64 s[12:13], 0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: s_mov_b64 s[10:11], 0 ; SI-NEXT: s_branch BB3_1 ; SI-NEXT: BB3_8: ; %loop.exit.guard4 ; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; SI-NEXT: s_and_b64 vcc, exec, s[10:11] +; SI-NEXT: s_and_b64 vcc, exec, s[8:9] ; SI-NEXT: s_cbranch_vccz BB3_4 ; SI-NEXT: ; %bb.9: ; %loop.exit.guard -; SI-NEXT: s_and_b64 vcc, exec, s[14:15] +; SI-NEXT: s_and_b64 vcc, exec, s[12:13] ; SI-NEXT: s_cbranch_vccz BB3_13 ; SI-NEXT: ; %bb.10: ; %for.cond.preheader -; SI-NEXT: s_cmpk_lt_i32 s8, 0x3e8 +; SI-NEXT: s_cmpk_lt_i32 s6, 0x3e8 ; SI-NEXT: s_cbranch_scc0 BB3_13 ; SI-NEXT: ; %bb.11: ; %for.body ; SI-NEXT: s_and_b64 vcc, exec, 0 @@ -234,12 +236,12 @@ ; ; FLAT-LABEL: loop_land_info_assert: ; FLAT: ; %bb.0: ; %entry -; FLAT-NEXT: s_mov_b32 s7, 0xf000 -; FLAT-NEXT: s_mov_b32 s6, -1 -; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; FLAT-NEXT: s_mov_b32 s3, 0xf000 +; FLAT-NEXT: s_mov_b32 s2, -1 +; FLAT-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; FLAT-NEXT: s_load_dword s8, s[0:1], 0x30 -; FLAT-NEXT: s_brev_b32 s9, 44 +; FLAT-NEXT: s_load_dword s6, s[0:1], 0x30 +; FLAT-NEXT: s_brev_b32 s7, 44 ; FLAT-NEXT: s_waitcnt lgkmcnt(0) ; FLAT-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 ; FLAT-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 @@ -248,52 +250,54 @@ ; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] ; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3] ; FLAT-NEXT: s_waitcnt vmcnt(0) -; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 +; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s7 ; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5] ; FLAT-NEXT: v_mov_b32_e32 v0, 3 ; FLAT-NEXT: s_branch BB3_4 ; FLAT-NEXT: BB3_1: ; %Flow6 ; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; FLAT-NEXT: s_mov_b64 s[10:11], 0 +; FLAT-NEXT: s_mov_b64 s[8:9], 0 ; FLAT-NEXT: BB3_2: ; %Flow5 ; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; FLAT-NEXT: s_mov_b64 s[14:15], 0 +; FLAT-NEXT: s_mov_b64 s[12:13], 0 ; FLAT-NEXT: BB3_3: ; %Flow ; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] +; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11] ; FLAT-NEXT: s_cbranch_vccnz BB3_8 ; FLAT-NEXT: BB3_4: ; %while.cond ; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLAT-NEXT: s_mov_b64 s[14:15], -1 -; FLAT-NEXT: s_mov_b64 s[10:11], -1 ; FLAT-NEXT: s_mov_b64 s[12:13], -1 +; FLAT-NEXT: s_mov_b64 s[8:9], -1 +; FLAT-NEXT: s_mov_b64 s[10:11], -1 ; FLAT-NEXT: s_mov_b64 vcc, s[0:1] ; FLAT-NEXT: s_cbranch_vccz BB3_3 ; FLAT-NEXT: ; %bb.5: ; %convex.exit ; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 +; FLAT-NEXT: s_mov_b64 s[8:9], -1 ; FLAT-NEXT: s_mov_b64 s[10:11], -1 -; FLAT-NEXT: s_mov_b64 s[12:13], -1 ; FLAT-NEXT: s_mov_b64 vcc, s[2:3] ; FLAT-NEXT: s_cbranch_vccz BB3_2 ; FLAT-NEXT: ; %bb.6: ; %if.end ; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; FLAT-NEXT: s_mov_b64 s[12:13], -1 +; FLAT-NEXT: s_mov_b64 s[10:11], -1 ; FLAT-NEXT: s_mov_b64 vcc, s[4:5] ; FLAT-NEXT: s_cbranch_vccz BB3_1 ; FLAT-NEXT: ; %bb.7: ; %if.else ; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; FLAT-NEXT: s_mov_b64 s[12:13], 0 -; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; FLAT-NEXT: s_mov_b32 s11, 0xf000 +; FLAT-NEXT: s_mov_b32 s10, -1 +; FLAT-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; FLAT-NEXT: s_mov_b64 s[10:11], 0 ; FLAT-NEXT: s_branch BB3_1 ; FLAT-NEXT: BB3_8: ; %loop.exit.guard4 ; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 -; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11] +; FLAT-NEXT: s_and_b64 vcc, exec, s[8:9] ; FLAT-NEXT: s_cbranch_vccz BB3_4 ; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard -; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15] +; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] ; FLAT-NEXT: s_cbranch_vccz BB3_13 ; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader -; FLAT-NEXT: s_cmpk_lt_i32 s8, 0x3e8 +; FLAT-NEXT: s_cmpk_lt_i32 s6, 0x3e8 ; FLAT-NEXT: s_cbranch_scc0 BB3_13 ; FLAT-NEXT: ; %bb.11: ; %for.body ; FLAT-NEXT: s_and_b64 vcc, exec, 0 diff --git a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll --- a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll +++ b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll @@ -6,431 +6,262 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: stdu 1, -720(1) -; CHECK-NEXT: .cfi_def_cfa_offset 720 -; CHECK-NEXT: .cfi_offset r14, -144 -; CHECK-NEXT: .cfi_offset r15, -136 -; CHECK-NEXT: .cfi_offset r16, -128 -; CHECK-NEXT: .cfi_offset r17, -120 -; CHECK-NEXT: .cfi_offset r18, -112 -; CHECK-NEXT: .cfi_offset r19, -104 -; CHECK-NEXT: .cfi_offset r20, -96 -; CHECK-NEXT: .cfi_offset r21, -88 -; CHECK-NEXT: .cfi_offset r22, -80 -; CHECK-NEXT: .cfi_offset r23, -72 -; CHECK-NEXT: .cfi_offset r24, -64 -; CHECK-NEXT: .cfi_offset r25, -56 -; CHECK-NEXT: .cfi_offset r26, -48 -; CHECK-NEXT: .cfi_offset r27, -40 -; CHECK-NEXT: .cfi_offset r28, -32 -; CHECK-NEXT: .cfi_offset r29, -24 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: .cfi_offset r31, -8 -; CHECK-NEXT: .cfi_offset r2, -152 -; CHECK-NEXT: lis 5, 4 -; CHECK-NEXT: std 30, 704(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 696(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 6, 5, 6292 -; CHECK-NEXT: std 28, 688(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 680(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, 672(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 664(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 5, 5, 6291 -; CHECK-NEXT: std 14, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: std 16, 592(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 600(1) # 8-byte Folded Spill -; CHECK-NEXT: std 18, 608(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 616(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 624(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 632(1) # 8-byte Folded Spill -; CHECK-NEXT: std 22, 640(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 648(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 656(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 712(1) # 8-byte Folded Spill -; CHECK-NEXT: std 2, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: sldi 6, 6, 32 -; CHECK-NEXT: oris 7, 6, 13030 -; CHECK-NEXT: oris 8, 6, 13066 -; CHECK-NEXT: oris 9, 6, 13054 -; CHECK-NEXT: oris 10, 6, 13042 -; CHECK-NEXT: oris 11, 6, 13078 -; CHECK-NEXT: oris 12, 6, 13115 -; CHECK-NEXT: oris 0, 6, 13103 -; CHECK-NEXT: oris 30, 6, 13091 -; CHECK-NEXT: oris 29, 6, 13127 -; CHECK-NEXT: oris 28, 6, 13164 -; CHECK-NEXT: oris 27, 6, 13152 -; CHECK-NEXT: oris 26, 6, 13139 -; CHECK-NEXT: oris 25, 6, 13176 -; CHECK-NEXT: ori 7, 7, 3704 -; CHECK-NEXT: ori 8, 8, 44408 -; CHECK-NEXT: ori 9, 9, 30840 -; CHECK-NEXT: ori 10, 10, 17272 -; CHECK-NEXT: ori 11, 11, 57976 -; CHECK-NEXT: ori 12, 12, 33144 -; CHECK-NEXT: ori 0, 0, 19576 -; CHECK-NEXT: ori 30, 30, 6008 -; CHECK-NEXT: ori 29, 29, 46712 -; CHECK-NEXT: ori 28, 28, 21880 -; CHECK-NEXT: ori 27, 27, 8312 -; CHECK-NEXT: ori 26, 26, 60280 -; CHECK-NEXT: ori 25, 25, 35448 -; CHECK-NEXT: add 7, 4, 7 -; CHECK-NEXT: sldi 5, 5, 32 -; CHECK-NEXT: oris 5, 5, 29347 -; CHECK-NEXT: ori 5, 5, 20088 -; CHECK-NEXT: std 7, 384(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 8 -; CHECK-NEXT: lis 8, 402 -; CHECK-NEXT: std 7, 376(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 9 -; CHECK-NEXT: lis 9, 451 -; CHECK-NEXT: std 7, 368(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 10 -; CHECK-NEXT: lis 10, 500 -; CHECK-NEXT: std 7, 360(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 11 -; CHECK-NEXT: lis 11, 549 -; CHECK-NEXT: std 7, 352(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 12 -; CHECK-NEXT: std 7, 344(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 0 -; CHECK-NEXT: std 7, 336(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 30 -; CHECK-NEXT: std 7, 328(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 29 -; CHECK-NEXT: std 7, 320(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 28 -; CHECK-NEXT: std 7, 312(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 27 -; CHECK-NEXT: std 7, 304(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 26 -; CHECK-NEXT: std 7, 296(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 25 -; CHECK-NEXT: std 7, 288(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 7, 6, 13213 -; CHECK-NEXT: ori 7, 7, 10616 -; CHECK-NEXT: add 7, 4, 7 -; CHECK-NEXT: std 7, 280(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 7, 6, 13200 -; CHECK-NEXT: oris 6, 6, 13188 -; CHECK-NEXT: ori 7, 7, 62584 -; CHECK-NEXT: ori 6, 6, 49016 -; CHECK-NEXT: add 7, 4, 7 +; CHECK-NEXT: lis 0, 4 +; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 5, 0, 6292 +; CHECK-NEXT: ori 0, 0, 6291 +; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill +; CHECK-NEXT: li 22, 0 +; CHECK-NEXT: std 20, -96(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, -88(1) # 8-byte Folded Spill +; CHECK-NEXT: sldi 23, 5, 32 +; CHECK-NEXT: oris 5, 23, 13030 +; CHECK-NEXT: sldi 0, 0, 32 +; CHECK-NEXT: oris 6, 23, 13066 +; CHECK-NEXT: oris 7, 23, 13054 +; CHECK-NEXT: oris 8, 23, 13042 +; CHECK-NEXT: oris 9, 23, 13078 +; CHECK-NEXT: oris 10, 23, 13115 +; CHECK-NEXT: oris 11, 23, 13103 +; CHECK-NEXT: oris 12, 23, 13091 +; CHECK-NEXT: oris 30, 23, 13127 +; CHECK-NEXT: oris 29, 23, 13164 +; CHECK-NEXT: oris 28, 23, 13152 +; CHECK-NEXT: oris 27, 23, 13139 +; CHECK-NEXT: oris 26, 23, 13176 +; CHECK-NEXT: oris 25, 23, 13213 +; CHECK-NEXT: oris 24, 23, 13200 +; CHECK-NEXT: oris 23, 23, 13188 +; CHECK-NEXT: oris 0, 0, 29347 +; CHECK-NEXT: ori 5, 5, 3704 +; CHECK-NEXT: ori 6, 6, 44408 +; CHECK-NEXT: ori 7, 7, 30840 +; CHECK-NEXT: ori 8, 8, 17272 +; CHECK-NEXT: ori 9, 9, 57976 +; CHECK-NEXT: ori 10, 10, 33144 +; CHECK-NEXT: ori 11, 11, 19576 +; CHECK-NEXT: ori 12, 12, 6008 +; CHECK-NEXT: ori 30, 30, 46712 +; CHECK-NEXT: ori 29, 29, 21880 +; CHECK-NEXT: ori 28, 28, 8312 +; CHECK-NEXT: ori 27, 27, 60280 +; CHECK-NEXT: ori 26, 26, 35448 +; CHECK-NEXT: ori 25, 25, 10616 +; CHECK-NEXT: ori 24, 24, 62584 +; CHECK-NEXT: ori 23, 23, 49016 +; CHECK-NEXT: ori 0, 0, 20088 +; CHECK-NEXT: add 5, 4, 5 ; CHECK-NEXT: add 6, 4, 6 -; CHECK-NEXT: add 4, 4, 5 -; CHECK-NEXT: lis 5, 268 -; CHECK-NEXT: std 4, 256(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 585 -; CHECK-NEXT: std 6, 264(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 6, 305 -; CHECK-NEXT: std 7, 272(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 7, 354 -; CHECK-NEXT: ori 4, 4, 61440 -; CHECK-NEXT: std 4, 560(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 48 -; CHECK-NEXT: ori 4, 4, 54272 -; CHECK-NEXT: std 4, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 97 -; CHECK-NEXT: ori 4, 4, 43008 -; CHECK-NEXT: std 4, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 146 -; CHECK-NEXT: ori 4, 4, 31744 -; CHECK-NEXT: std 4, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 195 -; CHECK-NEXT: ori 4, 4, 20480 -; CHECK-NEXT: std 4, 528(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 244 -; CHECK-NEXT: ori 4, 4, 9216 -; CHECK-NEXT: std 4, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 292 -; CHECK-NEXT: ori 4, 4, 63488 -; CHECK-NEXT: std 4, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 341 -; CHECK-NEXT: ori 4, 4, 52224 -; CHECK-NEXT: std 4, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 390 -; CHECK-NEXT: ori 4, 4, 40960 -; CHECK-NEXT: std 4, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 439 -; CHECK-NEXT: ori 4, 4, 29696 -; CHECK-NEXT: std 4, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 488 -; CHECK-NEXT: ori 4, 4, 18432 -; CHECK-NEXT: std 4, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 537 -; CHECK-NEXT: ori 4, 4, 7168 -; CHECK-NEXT: std 4, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 36 -; CHECK-NEXT: ori 4, 4, 40704 -; CHECK-NEXT: std 4, 464(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 85 -; CHECK-NEXT: ori 4, 4, 29440 -; CHECK-NEXT: std 4, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 134 -; CHECK-NEXT: ori 4, 4, 18176 -; CHECK-NEXT: std 4, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 183 -; CHECK-NEXT: ori 4, 4, 6912 -; CHECK-NEXT: std 4, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 231 -; CHECK-NEXT: ori 4, 4, 61184 -; CHECK-NEXT: std 4, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 280 -; CHECK-NEXT: ori 4, 4, 49920 -; CHECK-NEXT: std 4, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 329 -; CHECK-NEXT: ori 4, 4, 38656 -; CHECK-NEXT: std 4, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 378 -; CHECK-NEXT: ori 4, 4, 27392 -; CHECK-NEXT: std 4, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 427 -; CHECK-NEXT: ori 4, 4, 16128 -; CHECK-NEXT: std 4, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 476 -; CHECK-NEXT: ori 4, 4, 4864 -; CHECK-NEXT: std 4, 248(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 524 -; CHECK-NEXT: ori 4, 4, 59136 -; CHECK-NEXT: std 4, 240(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 573 -; CHECK-NEXT: ori 4, 4, 47872 -; CHECK-NEXT: std 4, 232(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 24 -; CHECK-NEXT: ori 4, 4, 27136 -; CHECK-NEXT: std 4, 224(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 73 -; CHECK-NEXT: ori 4, 4, 15872 -; CHECK-NEXT: std 4, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 122 -; CHECK-NEXT: ori 4, 4, 4608 -; CHECK-NEXT: std 4, 208(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 170 -; CHECK-NEXT: ori 4, 4, 58880 -; CHECK-NEXT: std 4, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 219 -; CHECK-NEXT: ori 4, 4, 47616 -; CHECK-NEXT: std 4, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 36352 -; CHECK-NEXT: lis 5, 317 -; CHECK-NEXT: ld 30, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 25088 -; CHECK-NEXT: lis 5, 366 -; CHECK-NEXT: ld 29, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 13824 -; CHECK-NEXT: lis 5, 415 -; CHECK-NEXT: ld 28, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 2560 -; CHECK-NEXT: lis 5, 463 -; CHECK-NEXT: ld 27, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 56832 -; CHECK-NEXT: lis 5, 512 -; CHECK-NEXT: ld 26, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 45568 -; CHECK-NEXT: lis 5, 561 -; CHECK-NEXT: ld 25, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 34304 -; CHECK-NEXT: lis 5, 12 -; CHECK-NEXT: ld 24, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 13568 -; CHECK-NEXT: lis 5, 61 -; CHECK-NEXT: ld 23, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 2304 -; CHECK-NEXT: lis 5, 109 -; CHECK-NEXT: std 4, 120(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 56576 -; CHECK-NEXT: lis 5, 158 -; CHECK-NEXT: ld 0, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 45312 -; CHECK-NEXT: lis 5, 207 -; CHECK-NEXT: ld 22, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 34048 -; CHECK-NEXT: lis 5, 256 -; CHECK-NEXT: ld 21, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 22784 -; CHECK-NEXT: ld 5, 248(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 6, 11520 -; CHECK-NEXT: ld 6, 240(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 7, 256 -; CHECK-NEXT: ld 7, 232(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 8, 54528 -; CHECK-NEXT: ld 8, 224(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 9, 43264 -; CHECK-NEXT: ld 9, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 10, 32000 -; CHECK-NEXT: ld 10, 208(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 56(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 11, 20736 -; CHECK-NEXT: ld 11, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: ld 31, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: add 7, 4, 7 +; CHECK-NEXT: add 8, 4, 8 +; CHECK-NEXT: add 9, 4, 9 +; CHECK-NEXT: add 10, 4, 10 +; CHECK-NEXT: add 11, 4, 11 +; CHECK-NEXT: add 12, 4, 12 +; CHECK-NEXT: add 30, 4, 30 +; CHECK-NEXT: add 29, 4, 29 +; CHECK-NEXT: add 28, 4, 28 +; CHECK-NEXT: add 27, 4, 27 +; CHECK-NEXT: add 26, 4, 26 +; CHECK-NEXT: add 25, 4, 25 +; CHECK-NEXT: add 24, 4, 24 +; CHECK-NEXT: add 23, 4, 23 +; CHECK-NEXT: add 4, 4, 0 +; CHECK-NEXT: li 0, 83 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_2 Depth 2 -; CHECK-NEXT: stw 4, 396(1) # 4-byte Folded Spill -; CHECK-NEXT: li 4, 83 -; CHECK-NEXT: mtctr 4 -; CHECK-NEXT: ld 12, 256(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 4, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: mtctr 0 +; CHECK-NEXT: mr 21, 4 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_2: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ld 2, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: stdux 3, 12, 2 -; CHECK-NEXT: ld 2, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 5 -; CHECK-NEXT: stdx 3, 12, 6 -; CHECK-NEXT: stdx 3, 12, 7 -; CHECK-NEXT: stdx 3, 12, 8 -; CHECK-NEXT: stdx 3, 12, 9 -; CHECK-NEXT: stdx 3, 12, 10 -; CHECK-NEXT: stdx 3, 12, 11 -; CHECK-NEXT: stdx 3, 12, 30 -; CHECK-NEXT: stdx 3, 12, 29 -; CHECK-NEXT: stdx 3, 12, 28 -; CHECK-NEXT: stdx 3, 12, 27 -; CHECK-NEXT: stdx 3, 12, 26 -; CHECK-NEXT: stdx 3, 12, 25 -; CHECK-NEXT: stdx 3, 12, 24 -; CHECK-NEXT: stdx 3, 12, 23 -; CHECK-NEXT: stdx 3, 12, 4 -; CHECK-NEXT: stdx 3, 12, 0 -; CHECK-NEXT: stdx 3, 12, 22 -; CHECK-NEXT: stdx 3, 12, 21 -; CHECK-NEXT: stdx 3, 12, 20 -; CHECK-NEXT: stdx 3, 12, 19 -; CHECK-NEXT: stdx 3, 12, 18 -; CHECK-NEXT: stdx 3, 12, 17 -; CHECK-NEXT: stdx 3, 12, 16 -; CHECK-NEXT: stdx 3, 12, 15 -; CHECK-NEXT: stdx 3, 12, 14 -; CHECK-NEXT: stdx 3, 12, 31 -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 512(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 408(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 +; CHECK-NEXT: lis 20, 585 +; CHECK-NEXT: ori 20, 20, 61440 +; CHECK-NEXT: stdux 3, 21, 20 +; CHECK-NEXT: lis 20, 48 +; CHECK-NEXT: ori 20, 20, 54272 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 97 +; CHECK-NEXT: ori 20, 20, 43008 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 146 +; CHECK-NEXT: ori 20, 20, 31744 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 195 +; CHECK-NEXT: ori 20, 20, 20480 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 244 +; CHECK-NEXT: ori 20, 20, 9216 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 292 +; CHECK-NEXT: ori 20, 20, 63488 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 341 +; CHECK-NEXT: ori 20, 20, 52224 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 390 +; CHECK-NEXT: ori 20, 20, 40960 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 439 +; CHECK-NEXT: ori 20, 20, 29696 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 488 +; CHECK-NEXT: ori 20, 20, 18432 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 537 +; CHECK-NEXT: ori 20, 20, 7168 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 36 +; CHECK-NEXT: ori 20, 20, 40704 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 85 +; CHECK-NEXT: ori 20, 20, 29440 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 134 +; CHECK-NEXT: ori 20, 20, 18176 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 183 +; CHECK-NEXT: ori 20, 20, 6912 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 231 +; CHECK-NEXT: ori 20, 20, 61184 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 280 +; CHECK-NEXT: ori 20, 20, 49920 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 329 +; CHECK-NEXT: ori 20, 20, 38656 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 378 +; CHECK-NEXT: ori 20, 20, 27392 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 427 +; CHECK-NEXT: ori 20, 20, 16128 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 476 +; CHECK-NEXT: ori 20, 20, 4864 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 524 +; CHECK-NEXT: ori 20, 20, 59136 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 573 +; CHECK-NEXT: ori 20, 20, 47872 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 24 +; CHECK-NEXT: ori 20, 20, 27136 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 73 +; CHECK-NEXT: ori 20, 20, 15872 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 122 +; CHECK-NEXT: ori 20, 20, 4608 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 170 +; CHECK-NEXT: ori 20, 20, 58880 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 219 +; CHECK-NEXT: ori 20, 20, 47616 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 268 +; CHECK-NEXT: ori 20, 20, 36352 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 317 +; CHECK-NEXT: ori 20, 20, 25088 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 366 +; CHECK-NEXT: ori 20, 20, 13824 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 415 +; CHECK-NEXT: ori 20, 20, 2560 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 463 +; CHECK-NEXT: ori 20, 20, 56832 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 512 +; CHECK-NEXT: ori 20, 20, 45568 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 561 +; CHECK-NEXT: ori 20, 20, 34304 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 12 +; CHECK-NEXT: ori 20, 20, 13568 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 61 +; CHECK-NEXT: ori 20, 20, 2304 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 109 +; CHECK-NEXT: ori 20, 20, 56576 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 158 +; CHECK-NEXT: ori 20, 20, 45312 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 207 +; CHECK-NEXT: ori 20, 20, 34048 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 256 +; CHECK-NEXT: ori 20, 20, 22784 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 305 +; CHECK-NEXT: ori 20, 20, 11520 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 354 +; CHECK-NEXT: ori 20, 20, 256 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 402 +; CHECK-NEXT: ori 20, 20, 54528 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 451 +; CHECK-NEXT: ori 20, 20, 43264 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 500 +; CHECK-NEXT: ori 20, 20, 32000 +; CHECK-NEXT: stdx 3, 21, 20 +; CHECK-NEXT: lis 20, 549 +; CHECK-NEXT: ori 20, 20, 20736 +; CHECK-NEXT: stdx 3, 21, 20 ; CHECK-NEXT: bdnz .LBB0_2 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: ld 12, 384(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 4, 396(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 4, 4, 1 -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 376(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 368(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 360(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 352(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 344(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 336(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 328(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 320(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 312(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 304(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 296(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 288(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 280(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 272(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: xoris 12, 4, 6 -; CHECK-NEXT: cmplwi 12, 6784 -; CHECK-NEXT: ld 12, 264(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 22, 22, 1 +; CHECK-NEXT: std 3, 0(5) +; CHECK-NEXT: std 3, 0(6) +; CHECK-NEXT: xoris 21, 22, 6 +; CHECK-NEXT: std 3, 0(7) +; CHECK-NEXT: std 3, 0(8) +; CHECK-NEXT: std 3, 0(9) +; CHECK-NEXT: std 3, 0(10) +; CHECK-NEXT: cmplwi 21, 6784 +; CHECK-NEXT: std 3, 0(11) ; CHECK-NEXT: std 3, 0(12) +; CHECK-NEXT: std 3, 0(30) +; CHECK-NEXT: std 3, 0(29) +; CHECK-NEXT: std 3, 0(28) +; CHECK-NEXT: std 3, 0(27) +; CHECK-NEXT: std 3, 0(26) +; CHECK-NEXT: std 3, 0(25) +; CHECK-NEXT: std 3, 0(24) +; CHECK-NEXT: std 3, 0(23) ; CHECK-NEXT: bne 0, .LBB0_1 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: ld 2, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 712(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 704(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 696(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: ld 28, 688(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 680(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 672(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 664(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 656(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 648(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 640(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 632(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 624(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 616(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 608(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 600(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 592(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 584(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 576(1) # 8-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 720 +; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, -88(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, -96(1) # 8-byte Folded Reload ; CHECK-NEXT: blr %3 = getelementptr inbounds i64, i64* %1, i64 144115188075855 %4 = getelementptr i64, i64* %1, i64 144115586875855 diff --git a/llvm/test/CodeGen/PowerPC/stdux-constuse.ll b/llvm/test/CodeGen/PowerPC/stdux-constuse.ll --- a/llvm/test/CodeGen/PowerPC/stdux-constuse.ll +++ b/llvm/test/CodeGen/PowerPC/stdux-constuse.ll @@ -31,10 +31,10 @@ ; CHECK: @test1 ; CHECK: mtctr ; CHECK: stdux -; CHECK-NEXT: stdx -; CHECK-NEXT: stdx -; CHECK-NEXT: stdx -; CHECK-NEXT: bdnz +; CHECK: stdx +; CHECK: stdx +; CHECK: stdx +; CHECK: bdnz for.end: %inc9 = add nsw i32 %nl.018, 1 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -441,25 +441,18 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: ldrd r9, r12, [sp, #128] +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: ldrd r9, r12, [sp, #88] ; CHECK-NEXT: sub.w r7, r12, #1 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: add.w r7, r6, r7, lsr #1 -; CHECK-NEXT: vdup.32 q1, r9 +; CHECK-NEXT: vdup.32 q0, r9 ; CHECK-NEXT: bic r7, r7, #3 -; CHECK-NEXT: vshl.i32 q3, q1, #3 +; CHECK-NEXT: vshl.i32 q1, q0, #3 ; CHECK-NEXT: subs r7, #4 ; CHECK-NEXT: add.w r10, r6, r7, lsr #2 -; CHECK-NEXT: adr r7, .LCPI9_0 -; CHECK-NEXT: adr r6, .LCPI9_1 -; CHECK-NEXT: vldrw.u32 q2, [r7] -; CHECK-NEXT: vldrw.u32 q0, [r6] -; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: .LBB9_1: @ %for.cond8.preheader.us.us.preheader ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB9_2 Depth 2 @@ -471,33 +464,36 @@ ; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1 ; CHECK-NEXT: @ => This Loop Header: Depth=2 ; CHECK-NEXT: @ Child Loop BB9_3 Depth 3 -; CHECK-NEXT: vdup.32 q5, r7 -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vshl.i32 q5, q5, #2 -; CHECK-NEXT: vmov q6, q1 -; CHECK-NEXT: vadd.i32 q5, q5, r0 +; CHECK-NEXT: vdup.32 q3, r7 +; CHECK-NEXT: adr r6, .LCPI9_1 +; CHECK-NEXT: vshl.i32 q3, q3, #2 +; CHECK-NEXT: adr r4, .LCPI9_0 +; CHECK-NEXT: vldrw.u32 q4, [r6] +; CHECK-NEXT: vadd.i32 q3, q3, r0 +; CHECK-NEXT: vldrw.u32 q5, [r4] ; CHECK-NEXT: dls lr, r10 -; CHECK-NEXT: vmov.i32 q4, #0x0 -; CHECK-NEXT: vadd.i32 q5, q5, q0 -; CHECK-NEXT: vmlas.u32 q6, q2, r5 +; CHECK-NEXT: vadd.i32 q3, q3, q4 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vmlas.u32 q4, q5, r5 ; CHECK-NEXT: .LBB9_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1 ; CHECK-NEXT: @ Parent Loop BB9_2 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 -; CHECK-NEXT: vadd.i32 q7, q6, q3 -; CHECK-NEXT: vldrw.u32 q0, [r1, q6, uxtw #2] -; CHECK-NEXT: vldrw.u32 q6, [q5, #32]! -; CHECK-NEXT: vmul.i32 q0, q0, q6 -; CHECK-NEXT: vmov q6, q7 -; CHECK-NEXT: vadd.i32 q4, q0, q4 +; CHECK-NEXT: vadd.i32 q5, q4, q1 +; CHECK-NEXT: vldrw.u32 q6, [r1, q4, uxtw #2] +; CHECK-NEXT: vldrw.u32 q4, [q3, #32]! +; CHECK-NEXT: vmul.i32 q4, q6, q4 +; CHECK-NEXT: vadd.i32 q2, q4, q2 +; CHECK-NEXT: vmov q4, q5 ; CHECK-NEXT: le lr, .LBB9_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block ; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=2 -; CHECK-NEXT: add.w r4, r5, r11 +; CHECK-NEXT: add.w r6, r5, r11 ; CHECK-NEXT: adds r5, #1 -; CHECK-NEXT: vaddv.u32 r6, q4 +; CHECK-NEXT: vaddv.u32 r4, q2 ; CHECK-NEXT: cmp r5, r9 -; CHECK-NEXT: str.w r6, [r2, r4, lsl #2] +; CHECK-NEXT: str.w r4, [r2, r6, lsl #2] ; CHECK-NEXT: bne .LBB9_2 ; CHECK-NEXT: @ %bb.5: @ %for.cond4.for.cond.cleanup6_crit_edge.us ; CHECK-NEXT: @ in Loop: Header=BB9_1 Depth=1 @@ -505,8 +501,7 @@ ; CHECK-NEXT: cmp r8, r3 ; CHECK-NEXT: bne .LBB9_1 ; CHECK-NEXT: @ %bb.6: @ %for.end25 -; CHECK-NEXT: add sp, #24 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 @@ -592,8 +587,8 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill @@ -601,16 +596,16 @@ ; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r3 ; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrne r0, [sp, #112] +; CHECK-NEXT: ldrne r0, [sp, #96] ; CHECK-NEXT: cmpne r0, #0 ; CHECK-NEXT: bne .LBB10_2 ; CHECK-NEXT: .LBB10_1: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #24 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .LBB10_2: @ %for.cond1.preheader.us.preheader -; CHECK-NEXT: ldr.w r9, [sp, #116] +; CHECK-NEXT: ldr.w r9, [sp, #100] ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: movs r1, #1 ; CHECK-NEXT: mov r11, r2 @@ -618,17 +613,15 @@ ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: sub.w r0, r10, #4 ; CHECK-NEXT: add.w r0, r1, r0, lsr #2 -; CHECK-NEXT: ldr r1, [sp, #112] ; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: lsl.w r0, r9, #1 -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: adr r0, .LCPI10_0 -; CHECK-NEXT: vdup.32 q4, r1 -; CHECK-NEXT: vldrw.u32 q5, [r0] -; CHECK-NEXT: lsls r4, r1, #1 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vshl.i32 q6, q4, #2 +; CHECK-NEXT: ldr r0, [sp, #96] +; CHECK-NEXT: lsl.w r1, r9, #1 +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill ; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vdup.32 q4, r0 +; CHECK-NEXT: lsls r4, r0, #1 +; CHECK-NEXT: vshl.i32 q5, q4, #2 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: b .LBB10_5 ; CHECK-NEXT: .LBB10_3: @ %for.cond5.preheader.us73.preheader @@ -653,7 +646,7 @@ ; CHECK-NEXT: @ Child Loop BB10_8 Depth 2 ; CHECK-NEXT: @ Child Loop BB10_11 Depth 3 ; CHECK-NEXT: @ Child Loop BB10_14 Depth 3 -; CHECK-NEXT: ldr r0, [sp, #112] +; CHECK-NEXT: ldr r0, [sp, #96] ; CHECK-NEXT: cmp.w r9, #0 ; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: mul r12, r1, r0 @@ -664,7 +657,7 @@ ; CHECK-NEXT: b .LBB10_8 ; CHECK-NEXT: .LBB10_7: @ %for.cond5.for.cond.cleanup7_crit_edge.us.us ; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 -; CHECK-NEXT: ldr r0, [sp, #112] +; CHECK-NEXT: ldr r0, [sp, #96] ; CHECK-NEXT: add.w r3, r1, r12 ; CHECK-NEXT: adds r1, #1 ; CHECK-NEXT: cmp r1, r0 @@ -684,16 +677,18 @@ ; CHECK-NEXT: .LBB10_10: @ %vector.ph ; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adr r2, .LCPI10_0 +; CHECK-NEXT: vldrw.u32 q2, [r2] ; CHECK-NEXT: vmov q1, q4 -; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vmlas.u32 q1, q5, r1 ; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: vmlas.u32 q1, q2, r1 ; CHECK-NEXT: .LBB10_11: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1 ; CHECK-NEXT: @ Parent Loop BB10_8 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 -; CHECK-NEXT: vadd.i32 q2, q1, q6 +; CHECK-NEXT: vadd.i32 q2, q1, q5 ; CHECK-NEXT: vldrh.s32 q3, [r6, q1, uxtw #1] ; CHECK-NEXT: vldrh.s32 q1, [r2], #8 ; CHECK-NEXT: vmul.i32 q1, q3, q1 @@ -708,7 +703,7 @@ ; CHECK-NEXT: beq .LBB10_7 ; CHECK-NEXT: .LBB10_13: @ %for.body8.us.us.preheader ; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 -; CHECK-NEXT: ldr r0, [sp, #112] +; CHECK-NEXT: ldr r0, [sp, #96] ; CHECK-NEXT: add.w r5, r8, r7 ; CHECK-NEXT: sub.w lr, r9, r7 ; CHECK-NEXT: mla r3, r0, r7, r1 @@ -848,57 +843,55 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldrd r2, r7, [sp, #104] -; CHECK-NEXT: add.w r8, r7, #10 -; CHECK-NEXT: adr r7, .LCPI11_0 -; CHECK-NEXT: ldr r1, [sp, #96] -; CHECK-NEXT: vdup.32 q1, r2 -; CHECK-NEXT: vldrw.u32 q0, [r7] -; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: mov.w r9, #6 -; CHECK-NEXT: movs r6, #11 -; CHECK-NEXT: vshl.i32 q1, q1, #2 -; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: ldrd r2, r7, [sp, #96] +; CHECK-NEXT: add.w r10, r7, #10 +; CHECK-NEXT: ldr.w r12, [sp, #140] +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: ldr r1, [sp, #88] +; CHECK-NEXT: vdup.32 q0, r2 +; CHECK-NEXT: vshl.i32 q0, q0, #2 +; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: .LBB11_1: @ %for.body10.i ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB11_2 Depth 2 ; CHECK-NEXT: @ Child Loop BB11_3 Depth 3 ; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: adr r7, .LCPI11_0 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: vldrw.u32 q1, [r7] ; CHECK-NEXT: .LBB11_2: @ %for.cond22.preheader.i ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 ; CHECK-NEXT: @ => This Loop Header: Depth=2 ; CHECK-NEXT: @ Child Loop BB11_3 Depth 3 ; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: .LBB11_3: @ %for.body27.i ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 ; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 ; CHECK-NEXT: @ => This Loop Header: Depth=3 ; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: dls lr, r9 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: mov.w r11, #4 +; CHECK-NEXT: mov.w lr, #6 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: movs r5, #4 ; CHECK-NEXT: .LBB11_4: @ %for.body78.us.i ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 ; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 ; CHECK-NEXT: @ Parent Loop BB11_3 Depth=3 ; CHECK-NEXT: @ => This Loop Header: Depth=4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: mul r4, r11, r6 -; CHECK-NEXT: vdup.32 q3, r5 -; CHECK-NEXT: vdup.32 q2, r7 -; CHECK-NEXT: vadd.i32 q4, q0, r4 +; CHECK-NEXT: movs r6, #11 +; CHECK-NEXT: vdup.32 q3, r7 +; CHECK-NEXT: muls r6, r5, r6 +; CHECK-NEXT: vdup.32 q2, r11 +; CHECK-NEXT: vadd.i32 q4, q1, r6 ; CHECK-NEXT: vmla.u32 q3, q4, r2 -; CHECK-NEXT: adds r4, #113 -; CHECK-NEXT: vadd.i32 q4, q0, r4 -; CHECK-NEXT: mov r4, r8 +; CHECK-NEXT: adds r6, #113 +; CHECK-NEXT: vadd.i32 q4, q1, r6 +; CHECK-NEXT: mov r6, r10 ; CHECK-NEXT: vmla.u32 q2, q4, r2 ; CHECK-NEXT: .LBB11_5: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 @@ -907,38 +900,36 @@ ; CHECK-NEXT: @ Parent Loop BB11_4 Depth=4 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=5 ; CHECK-NEXT: vldrb.s32 q6, [r0, q2] -; CHECK-NEXT: vadd.i32 q5, q2, q1 -; CHECK-NEXT: vadd.i32 q4, q3, q1 -; CHECK-NEXT: subs r4, #4 +; CHECK-NEXT: vadd.i32 q5, q2, q0 +; CHECK-NEXT: vadd.i32 q4, q3, q0 +; CHECK-NEXT: subs r6, #4 ; CHECK-NEXT: vadd.i32 q2, q6, r2 ; CHECK-NEXT: vldrb.s32 q6, [r1, q3] ; CHECK-NEXT: vmov q3, q4 -; CHECK-NEXT: vmlava.u32 r12, q2, q6 +; CHECK-NEXT: vmlava.u32 r4, q2, q6 ; CHECK-NEXT: vmov q2, q5 ; CHECK-NEXT: bne .LBB11_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: @ in Loop: Header=BB11_4 Depth=4 -; CHECK-NEXT: add.w r11, r11, #1 +; CHECK-NEXT: adds r5, #1 ; CHECK-NEXT: le lr, .LBB11_4 ; CHECK-NEXT: @ %bb.7: @ %for.cond.cleanup77.i ; CHECK-NEXT: @ in Loop: Header=BB11_3 Depth=3 -; CHECK-NEXT: adds r5, #1 -; CHECK-NEXT: add.w r10, r10, #1 -; CHECK-NEXT: cmp r5, r2 +; CHECK-NEXT: adds r7, #1 +; CHECK-NEXT: add.w r9, r9, #1 +; CHECK-NEXT: cmp r7, r2 ; CHECK-NEXT: bne .LBB11_3 ; CHECK-NEXT: @ %bb.8: @ %for.cond.cleanup26.i ; CHECK-NEXT: @ in Loop: Header=BB11_2 Depth=2 -; CHECK-NEXT: adds r7, #1 -; CHECK-NEXT: cmp r7, r3 +; CHECK-NEXT: add.w r11, r11, #1 +; CHECK-NEXT: cmp r11, r3 ; CHECK-NEXT: bne .LBB11_2 ; CHECK-NEXT: @ %bb.9: @ %for.cond.cleanup20.i ; CHECK-NEXT: @ in Loop: Header=BB11_1 Depth=1 -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: ldr r7, [sp, #148] -; CHECK-NEXT: adds r5, #1 -; CHECK-NEXT: cmp r5, r7 +; CHECK-NEXT: add.w r8, r8, #1 +; CHECK-NEXT: cmp r8, r12 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq r5, #0 +; CHECK-NEXT: moveq.w r8, #0 ; CHECK-NEXT: b .LBB11_1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.10: diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -551,12 +551,13 @@ ; Test an interesting case using nested loops and switches. ; CHECK-LABEL: test8: -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: end_block ; CHECK-NEXT: loop i32{{$}} ; CHECK-NEXT: i32.const $push{{[^,]+}}, 0{{$}} ; CHECK-NEXT: br_if 0, {{[^,]+}}{{$}} -; CHECK-NEXT: br 0{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: br 1{{$}} +; CHECK-NEXT: .LBB{{[0-9]+}}_4: ; CHECK-NEXT: end_loop{{$}} define i32 @test8() { bb: diff --git a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll --- a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll +++ b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll @@ -471,8 +471,8 @@ ; CHECK-LABEL: multiple_defs: ; CHECK: f64.add $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: local.tee $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} -; CHECK-NEXT: f64.select $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}} +; CHECK: local.tee $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} +; CHECK: f64.select $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], $pop{{[0-9]+}} ; CHECK: $[[NUM2]]=, ; NOREGS-LABEL: multiple_defs: ; NOREGS: f64.add diff --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll --- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll +++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll @@ -15,12 +15,12 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: movslq (%rdi), %rdi -; CHECK-NEXT: movslq (%rsi), %r8 -; CHECK-NEXT: movslq (%rdx), %r10 +; CHECK-NEXT: movslq (%rsi), %r10 +; CHECK-NEXT: movslq (%rdx), %r8 ; CHECK-NEXT: movl (%rcx), %esi ; CHECK-NEXT: movq %rsp, %rcx -; CHECK-NEXT: subl %edi, %r8d -; CHECK-NEXT: movslq %r8d, %rdx +; CHECK-NEXT: subl %edi, %r10d +; CHECK-NEXT: movslq %r10d, %rdx ; CHECK-NEXT: js .LBB0_1 ; CHECK-NEXT: # %bb.11: # %b63 ; CHECK-NEXT: testq %rdx, %rdx @@ -44,18 +44,17 @@ ; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: je .LBB0_16 ; CHECK-NEXT: .LBB0_1: # %a29b -; CHECK-NEXT: cmpl %r10d, %esi +; CHECK-NEXT: cmpl %r8d, %esi ; CHECK-NEXT: js .LBB0_10 ; CHECK-NEXT: # %bb.2: # %b158 ; CHECK-NEXT: movslq (%r9), %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movb $1, %r9b +; CHECK-NEXT: xorl %r9d, %r9d +; CHECK-NEXT: movb $1, %r8b ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_9: # %b1606 ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: testb %r9b, %r9b ; CHECK-NEXT: je .LBB0_10 ; CHECK-NEXT: .LBB0_3: # %a29b173 ; CHECK-NEXT: # =>This Loop Header: Depth=1 @@ -73,7 +72,7 @@ ; CHECK-NEXT: # Child Loop BB0_33 Depth 3 ; CHECK-NEXT: # Child Loop BB0_34 Depth 2 ; CHECK-NEXT: # Child Loop BB0_36 Depth 2 -; CHECK-NEXT: testl %r8d, %r8d +; CHECK-NEXT: testl %r10d, %r10d ; CHECK-NEXT: js .LBB0_4 ; CHECK-NEXT: # %bb.17: # %b179 ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 @@ -83,70 +82,76 @@ ; CHECK-NEXT: .LBB0_37: # %a30b ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_37 ; CHECK-NEXT: .LBB0_18: # %b188 ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: testb %r9b, %r9b +; CHECK-NEXT: testb %r8b, %r8b ; CHECK-NEXT: jne .LBB0_4 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_19: # %a30b294 ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_19 ; CHECK-NEXT: .LBB0_4: # %a33b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: movl %esi, %r10d -; CHECK-NEXT: orl %r8d, %r10d +; CHECK-NEXT: movl %esi, %r11d +; CHECK-NEXT: orl %r10d, %r11d ; CHECK-NEXT: jns .LBB0_20 ; CHECK-NEXT: .LBB0_5: # %a50b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: shrl $31, %r10d -; CHECK-NEXT: movl %r8d, %eax -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: shrl $31, %r11d +; CHECK-NEXT: movl %r10d, %edi +; CHECK-NEXT: orl %esi, %edi ; CHECK-NEXT: jns .LBB0_26 ; CHECK-NEXT: .LBB0_6: # %a57b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: testb %r10b, %r10b +; CHECK-NEXT: shrl $31, %edi +; CHECK-NEXT: testb %r11b, %r11b ; CHECK-NEXT: je .LBB0_30 ; CHECK-NEXT: .LBB0_7: # %a66b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: jne .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_34: # %a74b ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_34 ; CHECK-NEXT: # %bb.35: # %b1582 ; CHECK-NEXT: # in Loop: Header=BB0_34 Depth=2 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_34 ; CHECK-NEXT: .LBB0_8: # %a93b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: testl %r8d, %r8d +; CHECK-NEXT: testl %r10d, %r10d ; CHECK-NEXT: js .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_36: # %a97b ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: addss %xmm0, %xmm1 ; CHECK-NEXT: addss %xmm0, %xmm1 -; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: addss %xmm0, %xmm2 -; CHECK-NEXT: addss %xmm1, %xmm2 -; CHECK-NEXT: movss %xmm2, {{.*}}(%rip) -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: addss (%rcx), %xmm0 +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: movss %xmm0, {{.*}}(%rip) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_36 ; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_22: # %b463 ; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_23 ; CHECK-NEXT: .LBB0_20: # %b341 ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 @@ -159,7 +164,8 @@ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # Parent Loop BB0_20 Depth=2 ; CHECK-NEXT: # => This Inner Loop Header: Depth=3 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_21 ; CHECK-NEXT: jmp .LBB0_22 ; CHECK-NEXT: .p2align 4, 0x90 @@ -179,11 +185,13 @@ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # Parent Loop BB0_26 Depth=2 ; CHECK-NEXT: # => This Inner Loop Header: Depth=3 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_38 ; CHECK-NEXT: .LBB0_27: # %b879 ; CHECK-NEXT: # in Loop: Header=BB0_26 Depth=2 -; CHECK-NEXT: testb %r9b, %r9b +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_28 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_29: # %a53b1019 @@ -227,7 +235,8 @@ ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_25: # %b712 ; CHECK-NEXT: # in Loop: Header=BB0_23 Depth=2 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: .LBB0_23: # %b535 ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 @@ -240,7 +249,8 @@ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # Parent Loop BB0_23 Depth=2 ; CHECK-NEXT: # => This Inner Loop Header: Depth=3 -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_24 ; CHECK-NEXT: jmp .LBB0_25 ; CHECK-NEXT: .LBB0_10: # %a109b diff --git a/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll --- a/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll @@ -17,7 +17,7 @@ ; CHECK: %bb30.loopexit ; CHECK: divsd %xmm0 -; CHECK: movsd %xmm0, 16(%esp) +; CHECK: movsd %xmm0, 8(%esp) ; CHECK: %bb3 bb3: ; preds = %bb30.loopexit, %bb25, %bb3 %2 = load i32, i32* null, align 4 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/known-bits.ll b/llvm/test/CodeGen/X86/known-bits.ll --- a/llvm/test/CodeGen/X86/known-bits.ll +++ b/llvm/test/CodeGen/X86/known-bits.ll @@ -5,7 +5,6 @@ define void @knownbits_zext_in_reg(i8*) nounwind { ; X32-LABEL: knownbits_zext_in_reg: ; X32: # %bb.0: # %BB -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzbl (%eax), %ecx ; X32-NEXT: imull $101, %ecx, %eax @@ -13,7 +12,6 @@ ; X32-NEXT: imull $177, %ecx, %edx ; X32-NEXT: shrl $14, %edx ; X32-NEXT: movzbl %al, %ecx -; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB0_1: # %CF ; X32-NEXT: # =>This Loop Header: Depth=1 @@ -24,7 +22,8 @@ ; X32-NEXT: .LBB0_2: # %CF237 ; X32-NEXT: # Parent Loop BB0_1 Depth=1 ; X32-NEXT: # => This Inner Loop Header: Depth=2 -; X32-NEXT: testb %bl, %bl +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: testb %al, %al ; X32-NEXT: jne .LBB0_2 ; X32-NEXT: jmp .LBB0_1 ; @@ -36,7 +35,6 @@ ; X64-NEXT: imull $177, %eax, %edx ; X64-NEXT: shrl $14, %edx ; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: xorl %esi, %esi ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB0_1: # %CF ; X64-NEXT: # =>This Loop Header: Depth=1 @@ -47,7 +45,8 @@ ; X64-NEXT: .LBB0_2: # %CF237 ; X64-NEXT: # Parent Loop BB0_1 Depth=1 ; X64-NEXT: # => This Inner Loop Header: Depth=2 -; X64-NEXT: testb %sil, %sil +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb %al, %al ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: jmp .LBB0_1 BB: diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -23,52 +23,50 @@ ; CHECK-NEXT: .cfi_offset %edi, -16 ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: # implicit-def: $ecx ; CHECK-NEXT: # implicit-def: $edi -; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: # kill: killed $al -; CHECK-NEXT: # implicit-def: $dl +; CHECK-NEXT: # implicit-def: $bh +; CHECK-NEXT: # implicit-def: $bl ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_16: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb %dh, %dl +; CHECK-NEXT: movb %bl, %bh +; CHECK-NEXT: movb %al, %bl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_20 Depth 2 -; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: cmpb $8, %bl ; CHECK-NEXT: ja .LBB0_3 ; CHECK-NEXT: # %bb.2: # %for.cond ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.4: # %if.end ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl a -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %cl, %dh +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: movl $0, h -; CHECK-NEXT: cmpb $8, %dl +; CHECK-NEXT: cmpb $8, %bl ; CHECK-NEXT: jg .LBB0_8 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: calll printf -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb %dh, %dl +; CHECK-NEXT: # implicit-def: $esi +; CHECK-NEXT: movb %bl, %bh +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload +; CHECK-NEXT: movb %ah, %al +; CHECK-NEXT: movb %ah, %bl ; CHECK-NEXT: jne .LBB0_16 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 @@ -76,19 +74,19 @@ ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: # implicit-def: $esi ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: je .LBB0_7 ; CHECK-NEXT: .LBB0_11: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne .LBB0_12 ; CHECK-NEXT: .LBB0_17: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: je .LBB0_19 ; CHECK-NEXT: # %bb.18: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 @@ -96,40 +94,56 @@ ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: .LBB0_19: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $dl -; CHECK-NEXT: # implicit-def: $dh +; CHECK-NEXT: # implicit-def: $bh +; CHECK-NEXT: # implicit-def: $cl ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_20 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_8: # %if.end21 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_9 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je .LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $esi +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je .LBB0_17 +; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $edi +; CHECK-NEXT: # implicit-def: $bh +; CHECK-NEXT: # implicit-def: $bl +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_11 ; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb %dl, %dh -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movl %ebx, %ecx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_20: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_20 ; CHECK-NEXT: # %bb.21: # %for.cond47 ; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: .LBB0_9: # %ae -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: # %bb.22: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %bh, %bl +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: .LBB0_13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: je .LBB0_16 ; CHECK-NEXT: # %bb.14: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 @@ -140,20 +154,6 @@ ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: jmp .LBB0_16 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_17 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $edi -; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: # kill: killed $cl -; CHECK-NEXT: # implicit-def: $dl -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -29,8 +29,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 56 -; CHECK-NEXT: subq $552, %rsp ## imm = 0x228 -; CHECK-NEXT: .cfi_def_cfa_offset 608 +; CHECK-NEXT: subq $536, %rsp ## imm = 0x218 +; CHECK-NEXT: .cfi_def_cfa_offset 592 ; CHECK-NEXT: .cfi_offset %rbx, -56 ; CHECK-NEXT: .cfi_offset %r12, -48 ; CHECK-NEXT: .cfi_offset %r13, -40 @@ -48,13 +48,13 @@ ; CHECK-NEXT: ## %bb.2: ## %if.then4 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_55 +; CHECK-NEXT: je LBB0_56 ; CHECK-NEXT: ## %bb.3: ## %SyTime.exit ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_55 +; CHECK-NEXT: je LBB0_56 ; CHECK-NEXT: LBB0_4: ## %cleanup -; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 +; CHECK-NEXT: addq $536, %rsp ## imm = 0x218 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 ; CHECK-NEXT: popq %r13 @@ -65,7 +65,7 @@ ; CHECK-NEXT: LBB0_5: ## %if.end25 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_55 +; CHECK-NEXT: je LBB0_56 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx ; CHECK-NEXT: movq %rdi, %r14 @@ -96,40 +96,40 @@ ; CHECK-NEXT: testb %bpl, %bpl ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: leaq {{.*}}(%rip), %r13 -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r12d, %r12d -; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: movl %ebp, %ebx ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r12d, %ebp +; CHECK-NEXT: movl %ebx, %ebp ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 -; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_39 Depth 2 ; CHECK-NEXT: leal -268(%rbp), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 -; CHECK-NEXT: ## %bb.56: ## %while.body200 +; CHECK-NEXT: ## %bb.57: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movslq (%r13,%rax,4), %rax ; CHECK-NEXT: addq %r13, %rax ; CHECK-NEXT: jmpq *%rax -; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader +; CHECK-NEXT: LBB0_45: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movl %ebp, %ebx ; CHECK-NEXT: jne LBB0_21 -; CHECK-NEXT: jmp LBB0_55 +; CHECK-NEXT: jmp LBB0_56 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -138,183 +138,193 @@ ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r12d -; CHECK-NEXT: leaq {{.*}}(%rip), %rcx -; CHECK-NEXT: movslq (%rcx,%rax,4), %rax -; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: movl $-1, %ebx +; CHECK-NEXT: movslq (%rdx,%rax,4), %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r12d +; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 -; CHECK-NEXT: jne LBB0_34 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: jne LBB0_35 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 -; CHECK-NEXT: jne LBB0_34 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: jne LBB0_35 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: ## implicit-def: $rax ; CHECK-NEXT: jmp LBB0_29 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: leaq 1(%r14), %rax -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: leaq 1(%r12), %rax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: LBB0_29: ## %land.rhs485 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: js LBB0_55 +; CHECK-NEXT: js LBB0_56 ; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movq %rax, %r12 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 -; CHECK-NEXT: LBB0_45: ## %sw.bb1134 +; CHECK-NEXT: LBB0_46: ## %sw.bb1134 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx -; CHECK-NEXT: jb LBB0_55 -; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C +; CHECK-NEXT: jb LBB0_56 +; CHECK-NEXT: ## %bb.47: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl $268, %ebx ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: LBB0_41: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r12d +; CHECK-NEXT: movl $20, %ebx ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r12d +; CHECK-NEXT: movl $2, %ebx ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r14 +; CHECK-NEXT: incq %r12 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r12), %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: LBB0_35: ## %if.end517 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: leal -324(%rbx), %eax ; CHECK-NEXT: cmpl $59, %eax -; CHECK-NEXT: ja LBB0_35 -; CHECK-NEXT: ## %bb.57: ## %if.end517 +; CHECK-NEXT: ja LBB0_36 +; CHECK-NEXT: ## %bb.58: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movabsq $576460756598390785, %rcx ## imm = 0x800000100000001 -; CHECK-NEXT: btq %rax, %rcx -; CHECK-NEXT: jb LBB0_38 -; CHECK-NEXT: LBB0_35: ## %if.end517 +; CHECK-NEXT: movabsq $576460756598390785, %rdx ## imm = 0x800000100000001 +; CHECK-NEXT: btq %rax, %rdx +; CHECK-NEXT: jb LBB0_39 +; CHECK-NEXT: LBB0_36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r12d -; CHECK-NEXT: je LBB0_38 -; CHECK-NEXT: ## %bb.36: ## %if.end517 +; CHECK-NEXT: cmpl $11, %ebx +; CHECK-NEXT: je LBB0_39 +; CHECK-NEXT: ## %bb.37: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r12d -; CHECK-NEXT: je LBB0_38 -; CHECK-NEXT: ## %bb.37: ## %if.then532 +; CHECK-NEXT: cmpl $24, %ebx +; CHECK-NEXT: je LBB0_39 +; CHECK-NEXT: ## %bb.38: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movq _SyFgets.yank@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $0, (%rax) ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_38: ## %for.cond534 +; CHECK-NEXT: LBB0_39: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne LBB0_38 -; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne LBB0_39 +; CHECK-NEXT: ## %bb.40: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movb $0, (%r14) -; CHECK-NEXT: movl %ebp, %r12d -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movb $0, (%r12) +; CHECK-NEXT: movl %ebp, %ebx +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_42: ## %while.cond864 +; CHECK-NEXT: LBB0_43: ## %while.cond864 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_42 +; CHECK-NEXT: jmp LBB0_43 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_43: ## %while.cond962 +; CHECK-NEXT: LBB0_44: ## %while.cond962 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_43 +; CHECK-NEXT: jmp LBB0_44 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_25: ## %for.cond357 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r12d -; CHECK-NEXT: cmpl $16, %r12d -; CHECK-NEXT: ja LBB0_50 +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: cmpl $16, %ebx +; CHECK-NEXT: ja LBB0_51 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r12d, %eax -; CHECK-NEXT: jae LBB0_50 +; CHECK-NEXT: btl %ebx, %eax +; CHECK-NEXT: jae LBB0_51 ; CHECK-NEXT: ## %bb.24: ; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: LBB0_48: ## %if.then1477 +; CHECK-NEXT: LBB0_49: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write ; CHECK-NEXT: subq %rbx, %r14 ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax ; CHECK-NEXT: leaq 8189(%r14,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_49: ## %for.body1723 +; CHECK-NEXT: LBB0_50: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax -; CHECK-NEXT: jmp LBB0_49 -; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit +; CHECK-NEXT: jmp LBB0_50 +; CHECK-NEXT: LBB0_48: ## %if.then1477.loopexit ; CHECK-NEXT: movq %r14, %rbx -; CHECK-NEXT: jmp LBB0_48 +; CHECK-NEXT: jmp LBB0_49 ; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_41 +; CHECK-NEXT: je LBB0_42 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_17: ## %for.body643.us ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_17 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_41: ## %while.cond661 +; CHECK-NEXT: LBB0_42: ## %while.cond661 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_41 -; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader +; CHECK-NEXT: jmp LBB0_42 +; CHECK-NEXT: LBB0_51: ## %for.cond1480.preheader ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 ; CHECK-NEXT: cmpq %rax, %rax -; CHECK-NEXT: jae LBB0_55 -; CHECK-NEXT: ## %bb.51: ## %for.body1664.lr.ph +; CHECK-NEXT: jae LBB0_56 +; CHECK-NEXT: ## %bb.52: ## %for.body1664.lr.ph ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB0_54 -; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader -; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: LBB0_53: ## %while.body1679 +; CHECK-NEXT: jne LBB0_55 +; CHECK-NEXT: ## %bb.53: ## %while.body1679.preheader +; CHECK-NEXT: movl %ecx, %ebx +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: LBB0_54: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload ; CHECK-NEXT: movq (%rax), %rdi ; CHECK-NEXT: callq _fileno -; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload -; CHECK-NEXT: leal 1(%rax), %ecx -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movslq %ebx, %rax +; CHECK-NEXT: leal 1(%rax), %ebx ; CHECK-NEXT: cmpq %rax, %rax -; CHECK-NEXT: jl LBB0_53 -; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader +; CHECK-NEXT: jl LBB0_54 +; CHECK-NEXT: LBB0_55: ## %while.cond1683.preheader ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: LBB0_55: ## %if.then.i +; CHECK-NEXT: LBB0_56: ## %if.then.i ; CHECK-NEXT: ud2 entry: %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64