Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1672,6 +1672,28 @@ return false; } + if (MI.isInlineAsm()) { + // Verify register classes for inlineasm constraints. + for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands(); + I != E; ++I) { + const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI); + if (!RC) + continue; + + const MachineOperand &Op = MI.getOperand(I); + if (!Op.isReg()) + continue; + + unsigned Reg = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) { + ErrInfo = "inlineasm operand has incorrect register class."; + return false; + } + } + + return true; + } + // Make sure the register classes are correct. for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { if (MI.getOperand(i).isFPImm()) { Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -322,19 +322,25 @@ return isInlineImmediate(N); }]>; -class SGPRImm : PatLeaf : PatLeafgetGeneration() < SISubtarget::SOUTHERN_ISLANDS) { return false; } const SIRegisterInfo *SIRI = static_cast(Subtarget->getRegisterInfo()); + unsigned Limit = 0; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); - U != E; ++U) { + Limit < 10 && U != E; ++U, ++Limit) { const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); - if (RC && SIRI->isSGPRClass(RC)) - return true; + + // If the register class is unknown, it could be an unknown + // register class that needs to be an SGPR, e.g. an inline asm + // constraint + if (!RC || SIRI->isSGPRClass(RC)) + return false; } - return false; + + return Limit < 10; }]>; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1920,23 +1920,23 @@ /********** ================== **********/ def : Pat < - (SGPRImm<(i32 imm)>:$imm), - (S_MOV_B32 imm:$imm) + (VGPRImm<(i32 imm)>:$imm), + (V_MOV_B32_e32 imm:$imm) >; def : Pat < - (SGPRImm<(f32 fpimm)>:$imm), - (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) + (VGPRImm<(f32 fpimm)>:$imm), + (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) >; def : Pat < (i32 imm:$imm), - (V_MOV_B32_e32 imm:$imm) + (S_MOV_B32 imm:$imm) >; def : Pat < (f32 fpimm:$imm), - (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) + (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; def : Pat < Index: test/CodeGen/AMDGPU/inline-constraints.ll =================================================================== --- test/CodeGen/AMDGPU/inline-constraints.ll +++ test/CodeGen/AMDGPU/inline-constraints.ll @@ -26,8 +26,43 @@ ; GCN: s_mov_b32 m0, -1 ; GCN-NOT: s_mov_b32 s{{[0-9]+}}, m0 ; GCN: ; use m0 -define void @inline_sreg_constraint_m0(i32 addrspace(1)* %ptr) { +define void @inline_sreg_constraint_m0() { %m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"() tail call void asm sideeffect "; use $0", "s"(i32 %m0) ret void } + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i32: +; GCN: s_mov_b32 [[REG:s[0-9]+]], 32 +; GCN: ; use [[REG]] +define void @inline_sreg_constraint_imm_i32() { + tail call void asm sideeffect "; use $0", "s"(i32 32) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f32: +; GCN: s_mov_b32 [[REG:s[0-9]+]], 1.0 +; GCN: ; use [[REG]] +define void @inline_sreg_constraint_imm_f32() { + tail call void asm sideeffect "; use $0", "s"(float 1.0) + ret void +} + +; FIXME: Should be able to use s_mov_b64 +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define void @inline_sreg_constraint_imm_i64() { + tail call void asm sideeffect "; use $0", "s"(i64 -4) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f64: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0x3ff00000{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define void @inline_sreg_constraint_imm_f64() { + tail call void asm sideeffect "; use $0", "s"(double 1.0) + ret void +} Index: test/CodeGen/AMDGPU/insert_vector_elt.ll =================================================================== --- test/CodeGen/AMDGPU/insert_vector_elt.ll +++ test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -15,7 +15,7 @@ ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 0x40a00000 +; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000 ; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]] ; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]: define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {