Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -104,6 +104,9 @@ const SmallVectorImpl &OutVals, SDLoc DL, SelectionDAG &DAG) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const override; bool enableAggressiveFMAFusion(EVT VT) const override; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -27,6 +27,7 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -986,6 +987,52 @@ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps); } +unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { + unsigned Reg = StringSwitch(RegName) + .Case("m0", AMDGPU::M0) + .Case("exec", AMDGPU::EXEC) + .Case("exec_lo", AMDGPU::EXEC_LO) + .Case("exec_hi", AMDGPU::EXEC_HI) + .Case("flat_scratch", AMDGPU::FLAT_SCR) + .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) + .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) + .Default(AMDGPU::NoRegister); + + if (Reg == AMDGPU::NoRegister) { + report_fatal_error(Twine("invalid register name \"" + + StringRef(RegName) + "\".")); + + } + + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && + Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { + report_fatal_error(Twine("invalid register \"" + + StringRef(RegName) + "\" for subtarget.")); + } + + switch (Reg) { + case AMDGPU::M0: + case AMDGPU::EXEC_LO: + case AMDGPU::EXEC_HI: + case AMDGPU::FLAT_SCR_LO: + case AMDGPU::FLAT_SCR_HI: + if (VT.getSizeInBits() == 32) + return Reg; + break; + case AMDGPU::EXEC: + case AMDGPU::FLAT_SCR: + if (VT.getSizeInBits() == 64) + return Reg; + break; + default: + llvm_unreachable("missing register type checking"); + } + + report_fatal_error(Twine("invalid type for register \"" + + StringRef(RegName) + "\".")); +} + MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { Index: test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll @@ -0,0 +1,14 @@ +; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck %s + +; CHECK: invalid register "flat_scratch_lo" for subtarget. + +declare i32 @llvm.read_register.i32(metadata) #0 + +define void @test_invalid_read_flat_scratch_lo(i32 addrspace(1)* %out) nounwind { + store volatile i32 0, i32 addrspace(3)* undef + %m0 = call i32 @llvm.read_register.i32(metadata !0) + store i32 %m0, i32 addrspace(1)* %out + ret void +} + +!0 = !{!"flat_scratch_lo"} Index: test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll @@ -0,0 +1,14 @@ +; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s + +; CHECK: invalid type for register "exec". + +declare i32 @llvm.read_register.i32(metadata) #0 + +define void @test_invalid_read_exec(i32 addrspace(1)* %out) nounwind { + store volatile i32 0, i32 addrspace(3)* undef + %m0 = call i32 @llvm.read_register.i32(metadata !0) + store i32 %m0, i32 addrspace(1)* %out + ret void +} + +!0 = !{!"exec"} Index: test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll @@ -0,0 +1,13 @@ +; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s + +; CHECK: invalid type for register "m0". + +declare i64 @llvm.read_register.i64(metadata) #0 + +define void @test_invalid_read_m0(i64 addrspace(1)* %out) #0 { + %exec = call i64 @llvm.read_register.i64(metadata !0) + store i64 %exec, i64 addrspace(1)* %out + ret void +} + +!0 = !{!"m0"} Index: test/CodeGen/AMDGPU/read_register.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/read_register.ll @@ -0,0 +1,81 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s + +declare i32 @llvm.read_register.i32(metadata) #0 +declare i64 @llvm.read_register.i64(metadata) #0 + +; CHECK-LABEL: {{^}}test_read_m0: +; CHECK: s_mov_b32 m0, -1 +; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0 +; CHECK: buffer_store_dword [[COPY]] +define void @test_read_m0(i32 addrspace(1)* %out) #0 { + store volatile i32 0, i32 addrspace(3)* undef + %m0 = call i32 @llvm.read_register.i32(metadata !0) + store i32 %m0, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}test_read_exec: +; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], exec_lo +; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], exec_hi +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @test_read_exec(i64 addrspace(1)* %out) #0 { + %exec = call i64 @llvm.read_register.i64(metadata !1) + store i64 %exec, i64 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}test_read_flat_scratch: +; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], flat_scratch_lo +; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], flat_scratch_hi +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @test_read_flat_scratch(i64 addrspace(1)* %out) #0 { + %flat_scratch = call i64 @llvm.read_register.i64(metadata !2) + store i64 %flat_scratch, i64 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}test_read_flat_scratch_lo: +; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_lo +; CHECK: buffer_store_dword [[COPY]] +define void @test_read_flat_scratch_lo(i32 addrspace(1)* %out) #0 { + %flat_scratch_lo = call i32 @llvm.read_register.i32(metadata !3) + store i32 %flat_scratch_lo, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}test_read_flat_scratch_hi: +; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_hi +; CHECK: buffer_store_dword [[COPY]] +define void @test_read_flat_scratch_hi(i32 addrspace(1)* %out) #0 { + %flat_scratch_hi = call i32 @llvm.read_register.i32(metadata !4) + store i32 %flat_scratch_hi, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}test_read_exec_lo: +; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_lo +; CHECK: buffer_store_dword [[COPY]] +define void @test_read_exec_lo(i32 addrspace(1)* %out) #0 { + %exec_lo = call i32 @llvm.read_register.i32(metadata !5) + store i32 %exec_lo, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}test_read_exec_hi: +; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_hi +; CHECK: buffer_store_dword [[COPY]] +define void @test_read_exec_hi(i32 addrspace(1)* %out) #0 { + %exec_hi = call i32 @llvm.read_register.i32(metadata !6) + store i32 %exec_hi, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } + +!0 = !{!"m0"} +!1 = !{!"exec"} +!2 = !{!"flat_scratch"} +!3 = !{!"flat_scratch_lo"} +!4 = !{!"flat_scratch_hi"} +!5 = !{!"exec_lo"} +!6 = !{!"exec_hi"} Index: test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll @@ -0,0 +1,22 @@ +; XFAIL: * +; REQUIRES: asserts +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s + +; write_register doesn't prevent us from illegally trying to write a +; vgpr value into a scalar register, but I don't think there's much we +; can do to avoid this. + +declare void @llvm.write_register.i32(metadata, i32) #0 +declare i32 @llvm.r600.read.tidig.x() #0 + + +define void @write_vgpr_into_sgpr() { + %tid = call i32 @llvm.r600.read.tidig.x() + call void @llvm.write_register.i32(metadata !0, i32 %tid) + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!0 = !{!"exec_lo"} Index: test/CodeGen/AMDGPU/write_register.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/write_register.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s + +declare void @llvm.write_register.i32(metadata, i32) #0 +declare void @llvm.write_register.i64(metadata, i64) #0 + +; CHECK-LABEL: {{^}}test_write_m0: +define void @test_write_m0(i32 %val) #0 { + call void @llvm.write_register.i32(metadata !0, i32 0) + call void @llvm.write_register.i32(metadata !0, i32 -1) + call void @llvm.write_register.i32(metadata !0, i32 %val) + ret void +} + +; CHECK-LABEL: {{^}}test_write_exec: +; CHECK: s_mov_b64 exec, 0 +; CHECK: s_mov_b64 exec, -1 +; CHECK: s_mov_b64 exec, s{{\[[0-9]+:[0-9]+\]}} +define void @test_write_exec(i64 %val) #0 { + call void @llvm.write_register.i64(metadata !1, i64 0) + call void @llvm.write_register.i64(metadata !1, i64 -1) + call void @llvm.write_register.i64(metadata !1, i64 %val) + ret void +} + +; CHECK-LABEL: {{^}}test_write_flat_scratch: +; CHECK: s_mov_b64 flat_scratch, 0 +; CHECK: s_mov_b64 flat_scratch, -1 +; CHECK: s_mov_b64 flat_scratch, s{{\[[0-9]+:[0-9]+\]}} +define void @test_write_flat_scratch(i64 %val) #0 { + call void @llvm.write_register.i64(metadata !2, i64 0) + call void @llvm.write_register.i64(metadata !2, i64 -1) + call void @llvm.write_register.i64(metadata !2, i64 %val) + ret void +} + +; CHECK-LABEL: {{^}}test_write_flat_scratch_lo: +; CHECK: s_mov_b32 flat_scratch_lo, 0 +; CHECK: s_mov_b32 flat_scratch_lo, s{{[0-9]+}} +define void @test_write_flat_scratch_lo(i32 %val) #0 { + call void @llvm.write_register.i32(metadata !3, i32 0) + call void @llvm.write_register.i32(metadata !3, i32 %val) + ret void +} + +; CHECK-LABEL: {{^}}test_write_flat_scratch_hi: +; CHECK: s_mov_b32 flat_scratch_hi, 0 +; CHECK: s_mov_b32 flat_scratch_hi, s{{[0-9]+}} +define void @test_write_flat_scratch_hi(i32 %val) #0 { + call void @llvm.write_register.i32(metadata !4, i32 0) + call void @llvm.write_register.i32(metadata !4, i32 %val) + ret void +} + +; CHECK-LABEL: {{^}}test_write_exec_lo: +; CHECK: s_mov_b32 exec_lo, 0 +; CHECK: s_mov_b32 exec_lo, s{{[0-9]+}} +define void @test_write_exec_lo(i32 %val) #0 { + call void @llvm.write_register.i32(metadata !5, i32 0) + call void @llvm.write_register.i32(metadata !5, i32 %val) + ret void +} + +; CHECK-LABEL: {{^}}test_write_exec_hi: +; CHECK: s_mov_b32 exec_hi, 0 +; CHECK: s_mov_b32 exec_hi, s{{[0-9]+}} +define void @test_write_exec_hi(i32 %val) #0 { + call void @llvm.write_register.i32(metadata !6, i32 0) + call void @llvm.write_register.i32(metadata !6, i32 %val) + ret void +} + +attributes #0 = { nounwind } + +!0 = !{!"m0"} +!1 = !{!"exec"} +!2 = !{!"flat_scratch"} +!3 = !{!"flat_scratch_lo"} +!4 = !{!"flat_scratch_hi"} +!5 = !{!"exec_lo"} +!6 = !{!"exec_hi"}