Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -682,7 +682,21 @@ let AllocationPriority = 8; } -def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { + +// This is not a real register. This is just to have a register to add +// to VReg_1 that does not alias any real register that would +// introduce inferred register classess. +def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> { + let isArtificial = 1; +} + +// FIXME: Should specify an empty set for this. No register should +// ever be allocated using VReg_1. This is a hack for SelectionDAG +// that should always be lowered by SILowerI1Copies. TableGen crashes +// on an empty register set, but also sorts register classes based on +// the number of registerss in them. Add only one register so this is +// sorted to the end and not preferred over VGPR_32. +def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> { let Size = 1; } Index: llvm/test/CodeGen/AMDGPU/inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -198,8 +198,7 @@ } ; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr: -; CHECK: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], -1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, [[MASK]] +; CHECK: v_mov_b32_e32 v0, 1{{$}} ; CHECK: ; use v0 define amdgpu_kernel void @i1_imm_input_phys_vgpr() { entry: @@ -207,14 +206,14 @@ ret void } + +; FIXME: This behavior is nonsense. We should probably disallow i1 asm + ; CHECK-LABEL: {{^}}i1_input_phys_vgpr: ; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]] -; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]] -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]] -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NOT: [[LOAD]] ; CHECK: ; use v0 -; CHECK: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK: v_cndmask_b32_e64 [[STORE:v[0-9]+]], 0, 1, vcc +; CHECK: v_and_b32_e32 [[STORE:v[0-9]+]], 1, v1 ; CHECK: {{buffer|flat}}_store_byte [[STORE]], define amdgpu_kernel void @i1_input_phys_vgpr() { entry: @@ -224,12 +223,12 @@ ret void } -; FIXME: Should be scheduled to shrink vcc +; FIXME: Should prodbably be masking high bits of load. ; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2: -; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0 -; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK: v_cmp_eq_u32_e32 vcc, 1, v1 -; CHECK: v_cndmask_b32_e64 v1, 0, -1, vcc +; CHECK: buffer_load_ubyte v0 +; CHECK-NEXT: buffer_load_ubyte v1 +; CHECK-NEXT: s_waitcnt +; CHECK-NEXT: ASMSTART define amdgpu_kernel void @i1_input_phys_vgpr_x2() { entry: %val0 = load volatile i1, i1 addrspace(1)* undef