Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -44,6 +44,7 @@ // Instruction builtins. //===----------------------------------------------------------------------===// BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n") +BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n") BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n") BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n") BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n") Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -715,6 +715,12 @@ *out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2); } +// CHECK-LABEL: test_s_setreg( +// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val) +kernel void test_s_setreg(uint val) { + __builtin_amdgcn_s_setreg(8193, val); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl =================================================================== --- clang/test/SemaOpenCL/builtins-amdgcn-error.cl +++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -139,3 +139,8 @@ const char ptr[] = "workgroup"; __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}} } + +void test_s_setreg(int x, int y) { + __builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}} + __builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}} +} Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1207,6 +1207,16 @@ [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<0>] >; +// Note this can be used to set FP environment properties that are unsafe to +// change in non-stricfp functions. The register properties available +// (and value required to access them) may differ per subtarget. +// llvm.amdgcn.s.setreg(hwmode, value) +def int_amdgcn_s_setreg : + GCCBuiltin<"__builtin_amdgcn_s_setreg">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrNoMem, IntrHasSideEffects, ImmArg<0>] +>; + // int_amdgcn_s_getpc is provided to allow a specific style of position // independent code to determine the high part of its address when it is // known (through convention) that the code and any data of interest does Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -202,13 +202,6 @@ def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; -def AMDGPUSetRegOp : SDTypeProfile<0, 2, [ - SDTCisInt<0>, SDTCisInt<1> -]>; - -def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [ - SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; - def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [ SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2799,6 +2799,10 @@ constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } + case Intrinsic::amdgcn_s_setreg: { + constrainOpWithReadfirstlane(MI, MRI, 2); + return; + } default: { if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = AMDGPU::lookupRsrcIntrinsic(IntrID)) { @@ -3942,6 +3946,13 @@ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } + case Intrinsic::amdgcn_s_setreg: { + // This must be an SGPR, but accept a VGPR. + unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); + break; + } case Intrinsic::amdgcn_end_cf: case Intrinsic::amdgcn_init_exec: { unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7845,32 +7845,32 @@ const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE | (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) | (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_); - const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16); + const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32); const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction()); if (!HasFP32Denormals) { SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue EnableDenorm; + SDNode *EnableDenorm; if (Subtarget->hasDenormModeInst()) { const SDValue EnableDenormValue = getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget); EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, - DAG.getEntryNode(), EnableDenormValue); + DAG.getEntryNode(), EnableDenormValue).getNode(); } else { const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE, SL, MVT::i32); - EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs, - DAG.getEntryNode(), EnableDenormValue, - BitField); + EnableDenorm = + DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs, + {EnableDenormValue, BitField, DAG.getEntryNode()}); } SDValue Ops[3] = { NegDivScale0, - EnableDenorm.getValue(0), - EnableDenorm.getValue(1) + SDValue(EnableDenorm, 0), + SDValue(EnableDenorm, 1) }; NegDivScale0 = DAG.getMergeValues(Ops, SL); @@ -7894,25 +7894,25 @@ NumeratorScaled, Fma3); if (!HasFP32Denormals) { - SDValue DisableDenorm; + SDNode *DisableDenorm; if (Subtarget->hasDenormModeInst()) { const SDValue DisableDenormValue = getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget); DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other, Fma4.getValue(1), DisableDenormValue, - Fma4.getValue(2)); + Fma4.getValue(2)).getNode(); } else { const SDValue DisableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32); - DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, - Fma4.getValue(1), DisableDenormValue, - BitField, Fma4.getValue(2)); + DisableDenorm = DAG.getMachineNode( + AMDGPU::S_SETREG_B32, SL, MVT::Other, + {DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)}); } SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, - DisableDenorm, DAG.getRoot()); + SDValue(DisableDenorm, 0), DAG.getRoot()); DAG.setRoot(OutputChain); } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1131,7 +1131,7 @@ def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; -def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>; +def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>; def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -807,7 +807,7 @@ "s_setreg_b32", (outs), (ins SReg_32:$sdst, hwreg:$simm16), "$simm16, $sdst", - [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))] + [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)] >; // FIXME: Not on SI? Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; Set FP32 fp_round to round to zero +define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() { +; GCN-LABEL: test_setreg_f32_round_mode_rtz: +; GCN: ; %bb.0: +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3 +; GCN-NEXT: s_endpgm + call void @llvm.amdgcn.s.setreg(i32 4097, i32 3) + ret void +} + +; Set FP64/FP16 fp_round to round to zero +define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() { +; GCN-LABEL: test_setreg_f64_round_mode_rtz: +; GCN: ; %bb.0: +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3 +; GCN-NEXT: s_endpgm + call void @llvm.amdgcn.s.setreg(i32 4225, i32 3) + ret void +} + +; Set all fp_round to round to zero +define amdgpu_kernel void @test_setreg_all_round_mode_rtz() { +; GCN-LABEL: test_setreg_all_round_mode_rtz: +; GCN: ; %bb.0: +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7 +; GCN-NEXT: s_endpgm + call void @llvm.amdgcn.s.setreg(i32 8193, i32 7) + ret void +} + +; Set FP32 fp_round to dynamic mode +define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) { +; GCN-LABEL: test_setreg_roundingmode_var: +; GCN: ; %bb.0: +; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0 +; GCN-NEXT: s_endpgm + call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode) + ret void +} + +define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) { +; GCN-LABEL: test_setreg_roundingmode_var_vgpr: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_readfirstlane_b32 s4, v0 +; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4 +; GCN-NEXT: s_setpc_b64 s[30:31] + call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode) + ret void +} + +declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0 + +attributes #0 = { nounwind }