Index: llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -294,6 +294,38 @@ return false; } +static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, + const MachineInstr *MoveImm, + const SIInstrInfo *TII, + unsigned &SMovOp, + int64_t &Imm) { + + if (!MoveImm->isMoveImmediate()) + return false; + + const MachineOperand *ImmOp = + TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0); + if (!ImmOp->isImm()) + return false; + + // FIXME: Handle copies with sub-regs. + if (Copy->getOperand(0).getSubReg()) + return false; + + switch (MoveImm->getOpcode()) { + default: + return false; + case AMDGPU::V_MOV_B32_e32: + SMovOp = AMDGPU::S_MOV_B32; + break; + case AMDGPU::V_MOV_B64_PSEUDO: + SMovOp = AMDGPU::S_MOV_B64; + break; + } + Imm = ImmOp->getImm(); + return true; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -323,7 +355,17 @@ const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); + MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + unsigned SMovOp; + int64_t Imm; + // If we are just copying an immediate, we can replace the copy with + // s_mov_b32. + if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) { + MI.getOperand(1).ChangeToImmediate(Imm); + MI.addImplicitDefUseOperands(MF); + MI.setDesc(TII->get(SMovOp)); + break; + } TII->moveToVALU(MI); } Index: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll +++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -478,5 +478,30 @@ br label %bb1 } +; GCN-LABEL: {{^}}phi_imm_in_sgprs +; GCN: s_movk_i32 [[A:s[0-9]+]], 0x400 +; GCN: s_movk_i32 [[B:s[0-9]+]], 0x400 +; GCN: [[LOOP_LABEL:[0-9a-zA-Z_]+]]: +; GCN: s_xor_b32 [[B]], [[B]], [[A]] +; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]] +define void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) { +entry: + br label %loop + +loop: + %i = phi i32 [0, %entry], [%i.add, %loop] + %offset = phi i32 [1024, %entry], [%offset.xor, %loop] + %offset.xor = xor i32 %offset, 1024 + %offset.i = add i32 %offset.xor, %i + %ptr = getelementptr i32, i32 addrspace(3)* %out, i32 %offset.i + store i32 0, i32 addrspace(3)* %ptr + %i.add = add i32 %i, 1 + %cmp = icmp ult i32 %i.add, %cond + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind }