Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3285,6 +3285,13 @@ unsigned DstReg = MRI.createVirtualRegister(SRC); unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; + if (SubRegs == 1) { + BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), + get(AMDGPU::V_READFIRSTLANE_B32), DstReg) + .addReg(SrcReg); + return DstReg; + } + SmallVector SRegs; for (unsigned i = 0; i < SubRegs; ++i) { unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); @@ -3462,6 +3469,14 @@ return; } + // Legalize SI_INIT_M0 + if (MI.getOpcode() == AMDGPU::SI_INIT_M0) { + MachineOperand &Src = MI.getOperand(0); + if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg()))) + Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); + return; + } + // Legalize MIMG and MUBUF/MTBUF for shaders. // // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via Index: test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll @@ -121,6 +121,21 @@ ret void } +; TODO: This should use s_mul_i32 instead of v_mul_u32_u24 + v_readfirstlane! +; +; GCN-LABEL: {{^}}test_mul24: +; GCN: v_mul_u32_u24_e32 +; GCN: v_readfirstlane_b32 +; GCN: s_mov_b32 m0, +; GCN: s_sendmsg sendmsg(MSG_INTERRUPT) +define amdgpu_gs void @test_mul24(i32 inreg %arg) { +body: + %tmp1 = and i32 %arg, 511 + %tmp2 = mul nuw nsw i32 %tmp1, 12288 + call void @llvm.amdgcn.s.sendmsg(i32 1, i32 %tmp2) + ret void +} + declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0