Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -147,10 +147,18 @@ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] >; +def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + def int_amdgcn_rcp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] >; +def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + def int_amdgcn_rsq : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] >; Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -249,7 +249,9 @@ // For f64, max error 2^29 ULP, handles denormals. RCP, RSQ, + RCP_LEGACY, RSQ_LEGACY, + FMUL_LEGACY, RSQ_CLAMP, LDEXP, FP_CLASS, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2682,7 +2682,9 @@ NODE_NAME_CASE(TRIG_PREOP) NODE_NAME_CASE(RCP) NODE_NAME_CASE(RSQ) + NODE_NAME_CASE(RCP_LEGACY) NODE_NAME_CASE(RSQ_LEGACY) + NODE_NAME_CASE(FMUL_LEGACY) NODE_NAME_CASE(RSQ_CLAMP) NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(FP_CLASS) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -67,6 +67,7 @@ def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) +def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) result clamped to +/- max_float. @@ -84,6 +85,10 @@ [] >; +def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative] +>; + def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>; // out = max(a, b) a and b are signed ints Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1997,6 +1997,11 @@ return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); } + case Intrinsic::amdgcn_rcp_legacy: { + if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + return emitRemovedIntrinsicError(DAG, DL, VT); + return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1)); + } case Intrinsic::amdgcn_rsq_clamp: { if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); @@ -2214,6 +2219,9 @@ return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0, Denominator, Numerator); } + case Intrinsic::amdgcn_fmul_legacy: + return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT, + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::amdgcn_sffbh: case AMDGPUIntrinsic::AMDGPU_flbit_i32: // Legacy name. return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1)); @@ -3362,6 +3370,7 @@ case AMDGPUISD::FRACT: case AMDGPUISD::RCP: case AMDGPUISD::RSQ: + case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::RSQ_LEGACY: case AMDGPUISD::RSQ_CLAMP: case AMDGPUISD::LDEXP: { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1402,7 +1402,8 @@ defm V_LOG_CLAMP_F32 : VOP1InstSI , "v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; defm V_RCP_CLAMP_F32 : VOP1InstSI , "v_rcp_clamp_f32", VOP_F32_F32>; -defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32", VOP_F32_F32>; +defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32", + VOP_F32_F32, AMDGPUrcp_legacy>; defm V_RSQ_CLAMP_F32 : VOP1InstSI , "v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp >; @@ -1496,7 +1497,7 @@ let isCommutable = 1 in { defm V_MUL_LEGACY_F32 : VOP2Inst , "v_mul_legacy_f32", - VOP_F32_F32_F32 + VOP_F32_F32_F32, AMDGPUfmul_legacy >; defm V_MUL_F32 : VOP2Inst , "v_mul_f32", Index: test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + + +; GCN-LABEL: {{^}}test_mul_legacy_f32: +; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +define void @test_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b) #0 { + %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32: +; GCN: v_mul_legacy_f32_e32 +define void @test_mul_legacy_undef0_f32(float addrspace(1)* %out, float %a) #0 { + %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a) + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32: +; GCN: v_mul_legacy_f32_e32 +define void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, float %a) #0 { + %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef) + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32: +; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}| +define void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 { + %a.fabs = call float @llvm.fabs.f32(float %a) + %b.fabs = call float @llvm.fabs.f32(float %b) + %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs) + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; TODO: Should match mac_legacy/mad_legacy +; GCN-LABEL: {{^}}test_mad_legacy_f32: +; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_f32_e32 +define void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { + %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) + %add = fadd float %mul, %c + store float %add, float addrspace(1)* %out, align 4 + ret void +} + +declare float @llvm.fabs.f32(float) #1 +declare float @llvm.amdgcn.fmul.legacy(float, float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; ERROR: error: :0:0: in function rcp_legacy_f32 void (float addrspace(1)*, float): intrinsic not supported on subtarget + +declare float @llvm.amdgcn.rcp.legacy(float) #0 + +; GCN-LABEL: {{^}}rcp_legacy_f32: +; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} +define void @rcp_legacy_f32(float addrspace(1)* %out, float %src) #1 { + %rcp = call float @llvm.amdgcn.rcp.legacy(float %src) #0 + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; TODO: Really these should be constant folded +; GCN-LABEL: {{^}}rcp_legacy_f32_constant_4.0 +; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, 4.0 +define void @rcp_legacy_f32_constant_4.0(float addrspace(1)* %out) #1 { + %rcp = call float @llvm.amdgcn.rcp.legacy(float 4.0) #0 + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}rcp_legacy_f32_constant_100.0 +; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000 +define void @rcp_legacy_f32_constant_100.0(float addrspace(1)* %out) #1 { + %rcp = call float @llvm.amdgcn.rcp.legacy(float 100.0) #0 + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}rcp_legacy_undef_f32: +; GCN-NOT: v_rcp_legacy_f32 +define void @rcp_legacy_undef_f32(float addrspace(1)* %out) #1 { + %rcp = call float @llvm.amdgcn.rcp.legacy(float undef) + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }