Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7779,11 +7779,11 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { - if (!isTypeLegal(VT)) - return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); - + const TargetRegisterClass *RC = nullptr; if (Constraint.size() == 1) { switch (Constraint[0]) { + default: + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); case 's': case 'r': switch (VT.getSizeInBits()) { @@ -7791,40 +7791,56 @@ return std::make_pair(0U, nullptr); case 32: case 16: - return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass); + RC = &AMDGPU::SReg_32_XM0RegClass; + break; case 64: - return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); + RC = &AMDGPU::SGPR_64RegClass; + break; case 128: - return std::make_pair(0U, &AMDGPU::SReg_128RegClass); + RC = &AMDGPU::SReg_128RegClass; + break; case 256: - return std::make_pair(0U, &AMDGPU::SReg_256RegClass); + RC = &AMDGPU::SReg_256RegClass; + break; case 512: - return std::make_pair(0U, &AMDGPU::SReg_512RegClass); + RC = &AMDGPU::SReg_512RegClass; + break; } - + break; case 'v': switch (VT.getSizeInBits()) { default: return std::make_pair(0U, nullptr); case 32: case 16: - return std::make_pair(0U, &AMDGPU::VGPR_32RegClass); + RC = &AMDGPU::VGPR_32RegClass; + break; case 64: - return std::make_pair(0U, &AMDGPU::VReg_64RegClass); + RC = &AMDGPU::VReg_64RegClass; + break; case 96: - return std::make_pair(0U, &AMDGPU::VReg_96RegClass); + RC = &AMDGPU::VReg_96RegClass; + break; case 128: - return std::make_pair(0U, &AMDGPU::VReg_128RegClass); + RC = &AMDGPU::VReg_128RegClass; + break; case 256: - return std::make_pair(0U, &AMDGPU::VReg_256RegClass); + RC = &AMDGPU::VReg_256RegClass; + break; case 512: - return std::make_pair(0U, &AMDGPU::VReg_512RegClass); + RC = &AMDGPU::VReg_512RegClass; + break; } + break; } + // We actually support i128, i16 and f16 as inline parameters + // even if they are not reported as legal + if (RC && (isTypeLegal(VT) || VT.SimpleTy == MVT::i128 || + VT.SimpleTy == MVT::i16 || VT.SimpleTy == MVT::f16)) + return std::make_pair(0U, RC); } if (Constraint.size() > 1) { - const TargetRegisterClass *RC = nullptr; if (Constraint[1] == 'v') { RC = &AMDGPU::VGPR_32RegClass; } else if (Constraint[1] == 's') { Index: llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll +++ llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll @@ -4,20 +4,28 @@ ; GCN-LABEL: {{^}}inline_reg_constraints: ; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] +; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] +; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] ; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] ; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] ; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @inline_reg_constraints(i32 addrspace(1)* %ptr) { entry: %v32 = tail call i32 asm sideeffect "flat_load_dword $0, $1", "=v,v"(i32 addrspace(1)* %ptr) - %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) - %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) - %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr) - %s64 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) - %s128 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %v2_32 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %v64 = tail call i64 asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %v4_32 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %v128 = tail call i128 asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s32_2 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s64 = tail call i64 asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s4_32 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s128 = tail call i128 asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) %s256 = tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) ret void } Index: llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll @@ -1,10 +1,10 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s -; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}s_input_output_i16: -; SICI: error: couldn't allocate output register for constraint 's' -; SICI: error: couldn't allocate input reg for constraint 's' +; GCN: s_mov_b32 s[[REG:[0-9]+]], -1 +; GCN: ; use s[[REG]] define amdgpu_kernel void @s_input_output_i16() #0 { %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 @@ -12,8 +12,8 @@ } ; GCN-LABEL: {{^}}v_input_output_i16: -; SICI: error: couldn't allocate output register for constraint 'v' -; SICI: error: couldn't allocate input reg for constraint 'v' +; GCN: v_mov_b32 v[[REG:[0-9]+]], -1 +; GCN: ; use v[[REG]] define amdgpu_kernel void @v_input_output_i16() #0 { %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 tail call void asm sideeffect "; use $0", "v"(i16 %v) @@ -21,8 +21,8 @@ } ; GCN-LABEL: {{^}}s_input_output_f16: -; SICI: error: couldn't allocate output register for constraint 's' -; SICI: error: couldn't allocate input reg for constraint 's' +; GCN: s_mov_b32 s[[REG:[0-9]+]], -1 +; GCN: ; use s[[REG]] define amdgpu_kernel void @s_input_output_f16() #0 { %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 tail call void asm sideeffect "; use $0", "s"(half %v) @@ -30,8 +30,8 @@ } ; GCN-LABEL: {{^}}v_input_output_f16: -; SICI: error: couldn't allocate output register for constraint 'v' -; SICI: error: couldn't allocate input reg for constraint 'v' +; GCN: v_mov_b32 v[[REG:[0-9]+]], -1 +; GCN: ; use v[[REG]] define amdgpu_kernel void @v_input_output_f16() #0 { %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 tail call void asm sideeffect "; use $0", "v"(half %v) Index: llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll +++ llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll @@ -1,5 +1,6 @@ -; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s +; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s ; GCN: error: couldn't allocate output register for constraint 's' ; GCN: error: couldn't allocate input reg for constraint 's' @@ -19,32 +20,14 @@ ; GCN: error: couldn't allocate output register for constraint 's' ; GCN: error: couldn't allocate input reg for constraint 's' -define amdgpu_kernel void @s_input_output_i128() { - %v = tail call i128 asm sideeffect "s_mov_b32 $0, -1", "=s"() - tail call void asm sideeffect "; use $0", "s"(i128 %v) - ret void -} - -; GCN: error: couldn't allocate output register for constraint 's' -; GCN: error: couldn't allocate input reg for constraint 's' define amdgpu_kernel void @s_input_output_v8f16() { %v = tail call <8 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"() tail call void asm sideeffect "; use $0", "s"(<8 x half> %v) ret void } -; CI: error: couldn't allocate output register for constraint 's' -; CI: error: couldn't allocate input reg for constraint 's' -; VI-NOT: error -define amdgpu_kernel void @s_input_output_f16() { - %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() - tail call void asm sideeffect "; use $0", "s"(half %v) - ret void -} - -; CI: error: couldn't allocate output register for constraint 's' -; CI: error: couldn't allocate input reg for constraint 's' - +; SICI: error: couldn't allocate output register for constraint 's' +; SICI: error: couldn't allocate input reg for constraint 's' ; VI-NOT: error define amdgpu_kernel void @s_input_output_v2f16() { %v = tail call <2 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"() @@ -52,8 +35,8 @@ ret void } -; CI: error: couldn't allocate output register for constraint 'v' -; CI: error: couldn't allocate input reg for constraint 'v' +; SICI: error: couldn't allocate output register for constraint 'v' +; SICI: error: couldn't allocate input reg for constraint 'v' ; VI-NOT: error define amdgpu_kernel void @v_input_output_v2f16() { %v = tail call <2 x half> asm sideeffect "v_mov_b32 $0, -1", "=v"() @@ -61,20 +44,8 @@ ret void } -; CI: error: couldn't allocate output register for constraint 's' -; CI: error: couldn't allocate input reg for constraint 's' -; VI-NOT: error -define amdgpu_kernel void @s_input_output_i16() { - %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() - tail call void asm sideeffect "; use $0", "s"(i16 %v) - ret void -} - -; FIXME: Should work on all targets? - -; CI: error: couldn't allocate output register for constraint 's' -; CI: error: couldn't allocate input reg for constraint 's' - +; SICI: error: couldn't allocate output register for constraint 's' +; SICI: error: couldn't allocate input reg for constraint 's' ; VI-NOT: error define amdgpu_kernel void @s_input_output_v2i16() { %v = tail call <2 x i16> asm sideeffect "s_mov_b32 $0, -1", "=s"()