Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -110,6 +110,21 @@ [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem] >; +def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">, + Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_amdgcn_umed3 : GCCBuiltin<"__builtin_amdgcn_umed3">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_amdgcn_smed3 : GCCBuiltin<"__builtin_amdgcn_smed3">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1503,6 +1503,15 @@ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1)); case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3: return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1)); + case Intrinsic::amdgcn_fmed3: + return DAG.getNode(AMDGPUISD::FMED3, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::amdgcn_umed3: + return DAG.getNode(AMDGPUISD::UMED3, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::amdgcn_smed3: + return DAG.getNode(AMDGPUISD::SMED3, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); } Index: test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare float @llvm.amdgcn.fmed3(float, float, float) #0 + +; GCN-LABEL: {{^}}test_fmed3: +; GCN: v_med3_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @test_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 { + %mad = call float @llvm.amdgcn.fmed3(float %src0, float %src1, float %src2) + store float %mad, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.smed3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.smed3.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.smed3(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}test_smed3: +; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @test_smed3(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { + %med3 = call i32 @llvm.amdgcn.smed3(i32 %src0, i32 %src1, i32 %src2) + store i32 %med3, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.umed3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.umed3.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.umed3(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}test_umed3: +; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @test_umed3(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { + %med3 = call i32 @llvm.amdgcn.umed3(i32 %src0, i32 %src1, i32 %src2) + store i32 %med3, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }