Index: llvm/lib/CodeGen/MachineCSE.cpp =================================================================== --- llvm/lib/CodeGen/MachineCSE.cpp +++ llvm/lib/CodeGen/MachineCSE.cpp @@ -145,7 +145,7 @@ DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); - bool isPRECandidate(MachineInstr *MI); + bool isPRECandidate(MachineInstr *MI, SmallSet &PhysRefs); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); /// Heuristics to see if it's profitable to move common computations of MBB @@ -798,7 +798,8 @@ // We use stronger checks for PRE candidate rather than for CSE ones to embrace // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps // to exclude instrs created by PRE that won't be CSEed later. -bool MachineCSE::isPRECandidate(MachineInstr *MI) { +bool MachineCSE::isPRECandidate(MachineInstr *MI, + SmallSet &PhysRefs) { if (!isCSECandidate(MI) || MI->isNotDuplicable() || MI->mayLoad() || @@ -807,13 +808,14 @@ MI->getNumExplicitDefs() != 1) return false; - for (const auto &def : MI->defs()) - if (!Register::isVirtualRegister(def.getReg())) - return false; - - for (const auto &use : MI->uses()) - if (use.isReg() && !Register::isVirtualRegister(use.getReg())) - return false; + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && !Register::isVirtualRegister(MO.getReg())) { + if (MO.isDef()) + return false; + else + PhysRefs.insert(MO.getReg()); + } + } return true; } @@ -822,7 +824,8 @@ MachineBasicBlock *MBB) { bool Changed = false; for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { - if (!isPRECandidate(&MI)) + SmallSet PhysRefs; + if (!isPRECandidate(&MI, PhysRefs)) continue; if (!PREMap.count(&MI)) { @@ -858,6 +861,15 @@ if (MI.isConvergent() && CMBB != MBB) continue; + // If this instruction uses physical registers then we can only do PRE + // if it's using the value that is live at the place we're hoisting to. + bool NonLocal; + PhysDefVector PhysDefs; + if (!PhysRefs.empty() && + !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs, + PhysDefs, NonLocal)) + continue; + assert(MI.getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); Register VReg = MI.getOperand(0).getReg(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll @@ -22,8 +22,6 @@ } ; CHECK-LABEL: {{^}}dynamic_shared_array_1: -; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} -; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]] define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -191,131 +191,131 @@ ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 -; CHECK-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 ; CHECK-NEXT: s_sub_u32 s4, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 ; CHECK-NEXT: s_subb_u32 s5, 0, s3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v2, v2 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s2, v0 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 -; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 -; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], s1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v9 +; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, s0, v6 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], s1, v4 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v4, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v5 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v4, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v6 +; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_branch .LBB1_3 ; CHECK-NEXT: .LBB1_2: @@ -326,9 +326,8 @@ ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -188,130 +188,130 @@ ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 -; CHECK-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 ; CHECK-NEXT: s_sub_u32 s4, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 ; CHECK-NEXT: s_subb_u32 s5, 0, s3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v2, v2 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s0, v5 -; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v0, vcc -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], s1, v0 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v6 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s1, v1 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s2, v3 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_branch .LBB1_3 ; CHECK-NEXT: .LBB1_2: @@ -322,9 +322,8 @@ ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 Index: llvm/test/CodeGen/AMDGPU/selectcc-opt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/selectcc-opt.ll +++ llvm/test/CodeGen/AMDGPU/selectcc-opt.ll @@ -33,6 +33,7 @@ ; EG-LABEL: {{^}}test_b: ; EG: SET{{[GTEQN]+}}_DX10 +; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) ; EG-NEXT: PRED_ ; EG-NEXT: ALU clause starting define amdgpu_kernel void @test_b(i32 addrspace(1)* %out, float %in) { Index: llvm/test/CodeGen/ARM/machine-cse-cmp.ll =================================================================== --- llvm/test/CodeGen/ARM/machine-cse-cmp.ll +++ llvm/test/CodeGen/ARM/machine-cse-cmp.ll @@ -80,3 +80,41 @@ %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ] ret i8* %retval.0 } + +; The cmp of %val should not be hoisted above the preceding conditional branch +define void @f4(i32** %ptr1, i64* %ptr2, i64 %val) { +entry: +; CHECK-LABEL: f4: +; CHECK: cmp +; CHECK: movne +; CHECK: strne +; CHECK: orrs +; CHECK-NOT: subs +; CHECK-NOT: sbcs +; CHECK: beq + %tobool.not = icmp eq i32** %ptr1, null + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + store i32* null, i32** %ptr1, align 4 + br label %if.end + +if.end: +; CHECK: subs +; CHECK: sbcs +; CHECK: bxlt lr + %tobool1 = icmp ne i64 %val, 0 + %cmp = icmp slt i64 %val, 10 + %or.cond = and i1 %tobool1, %cmp + br i1 %or.cond, label %cleanup, label %if.end3 + +if.end3: +; CHECK: subs +; CHECK: sbc + %sub = add nsw i64 %val, -10 + store i64 %sub, i64* %ptr2, align 8 + br label %cleanup + +cleanup: + ret void +} Index: llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir @@ -0,0 +1,173 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=powerpc-unknown-unknown -run-pass=machine-cse -verify-machineinstrs | FileCheck %s +--- | + define void @can_pre() { + entry: + br label %for.body + + for.body: + br i1 undef, label %if.then, label %if.else + + if.then: + br label %if.end + + if.else: + br label %if.end + + if.end: + br label %for.body + } + + define void @cannot_pre() { + entry: + br label %for.body + + for.body: + br i1 undef, label %if.then, label %if.else + + if.then: + br label %if.end + + if.else: + br label %if.end + + if.end: + br label %for.body + } +... +--- +name: can_pre +registers: + - { id: 0, class: f8rc, preferred-register: '' } + - { id: 1, class: f8rc, preferred-register: '' } + - { id: 2, class: gprc, preferred-register: '' } + - { id: 3, class: gprc, preferred-register: '' } + - { id: 4, class: f8rc, preferred-register: '' } + - { id: 5, class: f8rc, preferred-register: '' } +liveins: + - { reg: '$r1', virtual-reg: '%2' } + - { reg: '$r2', virtual-reg: '%3' } + - { reg: '$f1', virtual-reg: '%4' } + - { reg: '$f2', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: can_pre + ; CHECK: bb.0.for.body: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $r1, $r2, $f1, $f2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:f8rc = COPY $f2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:f8rc = COPY $f1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc = COPY $r2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY $r1 + ; CHECK-NEXT: $cr0 = CMPLWI [[COPY3]], 0 + ; CHECK-NEXT: %6:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: BCC 44, $cr0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: BLR implicit $lr, implicit $rm + bb.0.for.body: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r1, $r2, $f1, $f2 + + %5:f8rc = COPY $f2 + %4:f8rc = COPY $f1 + %3:gprc = COPY $r2 + %2:gprc = COPY $r1 + $cr0 = CMPLWI %2, 0 + BCC 44, $cr0, %bb.1 + B %bb.2 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %0:f8rc = nofpexcept FDIV %4, %5, implicit $rm + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %1:f8rc = nofpexcept FDIV %4, %5, implicit $rm + + bb.3.if.end: + BLR implicit $lr, implicit $rm +... +--- +name: cannot_pre +registers: + - { id: 0, class: f8rc, preferred-register: '' } + - { id: 1, class: f8rc, preferred-register: '' } + - { id: 2, class: gprc, preferred-register: '' } + - { id: 3, class: gprc, preferred-register: '' } + - { id: 4, class: f8rc, preferred-register: '' } + - { id: 5, class: f8rc, preferred-register: '' } + - { id: 6, class: f8rc, preferred-register: '' } +liveins: + - { reg: '$r1', virtual-reg: '%2' } + - { reg: '$r2', virtual-reg: '%3' } + - { reg: '$f1', virtual-reg: '%4' } + - { reg: '$f2', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: cannot_pre + ; CHECK: bb.0.for.body: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $r1, $r2, $f1, $f2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:f8rc = COPY $f2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:f8rc = COPY $f1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc = COPY $r2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY $r1 + ; CHECK-NEXT: $cr0 = CMPLWI [[COPY3]], 0 + ; CHECK-NEXT: BCC 44, $cr0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SETRND:%[0-9]+]]:f8rc = SETRND [[COPY2]], implicit-def $rm, implicit $rm + ; CHECK-NEXT: %0:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %1:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: BLR implicit $lr, implicit $rm + bb.0.for.body: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r1, $r2, $f1, $f2 + + %5:f8rc = COPY $f2 + %4:f8rc = COPY $f1 + %3:gprc = COPY $r2 + %2:gprc = COPY $r1 + $cr0 = CMPLWI %2, 0 + BCC 44, $cr0, %bb.1 + B %bb.2 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %6:f8rc = SETRND %3, implicit-def $rm, implicit $rm + %0:f8rc = nofpexcept FDIV %4, %5, implicit $rm + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %1:f8rc = nofpexcept FDIV %4, %5, implicit $rm + + bb.3.if.end: + BLR implicit $lr, implicit $rm +... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll @@ -277,23 +277,23 @@ define void @test_memset_preheader(i8* %x, i8* %y, i32 %n) { ; CHECK-LABEL: test_memset_preheader: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cbz r2, .LBB6_5 ; CHECK-NEXT: @ %bb.1: @ %prehead ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: wlstp.8 lr, r2, .LBB6_3 ; CHECK-NEXT: .LBB6_2: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vstrb.8 q0, [r12], #16 +; CHECK-NEXT: vstrb.8 q0, [r4], #16 ; CHECK-NEXT: letp lr, .LBB6_2 ; CHECK-NEXT: .LBB6_3: @ %prehead ; CHECK-NEXT: dls lr, r2 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov r3, r0 ; CHECK-NEXT: .LBB6_4: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r3, [r12], #1 -; CHECK-NEXT: strb r3, [r1], #1 +; CHECK-NEXT: ldrb r4, [r3], #1 +; CHECK-NEXT: strb r4, [r1], #1 ; CHECK-NEXT: le lr, .LBB6_4 ; CHECK-NEXT: .LBB6_5: @ %for.cond.cleanup ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -302,7 +302,7 @@ ; CHECK-NEXT: vstrb.8 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB6_6 ; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: %cmp6 = icmp ne i32 %n, 0 br i1 %cmp6, label %prehead, label %for.cond.cleanup