Index: lib/Target/R600/AMDGPUInstructions.td =================================================================== --- lib/Target/R600/AMDGPUInstructions.td +++ lib/Target/R600/AMDGPUInstructions.td @@ -438,6 +438,11 @@ [{return N->isExactlyValue(1.0);}] >; +def FP_HALF : PatLeaf < + (fpimm), + [{return N->isExactlyValue(0.5);}] +>; + let isCodeGenOnly = 1, isPseudo = 1 in { let usesCustomInserter = 1 in { @@ -603,6 +608,18 @@ // 24-bit arithmetic patterns def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; +// Special conversion patterns + +def cvt_rpi_i32_f32 : PatFrag < + (ops node:$src), + (fp_to_sint (ffloor (fadd $src, FP_HALF))) +>; + +def cvt_flr_i32_f32 : PatFrag < + (ops node:$src), + (fp_to_sint (ffloor $src)) +>; + /* class UMUL24Pattern : Pat < (mul U24:$x, U24:$y), Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -1230,8 +1230,10 @@ defm V_CVT_F32_F16 : VOP1Inst , "v_cvt_f32_f16", VOP_F32_I32, f16_to_fp >; -//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>; -//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>; +defm V_CVT_RPI_I32_F32 : VOP1Inst , "v_cvt_rpi_i32_f32", + VOP_I32_F32, cvt_rpi_i32_f32>; +defm V_CVT_FLR_I32_F32 : VOP1Inst , "v_cvt_flr_i32_f32", + VOP_I32_F32, cvt_flr_i32_f32>; //defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>; defm V_CVT_F32_F64 : VOP1Inst , "v_cvt_f32_f64", VOP_F32_F64, fround Index: test/CodeGen/R600/cvt_flr_i32_f32.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/cvt_flr_i32_f32.ll @@ -0,0 +1,79 @@ +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare float @llvm.fabs.f32(float) #1 +declare float @llvm.floor.f32(float) #1 + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0: +; SI-NOT: add +; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI: s_endpgm +define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { + %floor = call float @llvm.floor.f32(float %x) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1: +; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}} +; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]] +; SI: s_endpgm +define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 { + %fadd = fadd float %x, 1.0 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs: +; SI-NOT: add +; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}| +; SI: s_endpgm +define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %floor = call float @llvm.floor.f32(float %x.fabs) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg: +; SI-NOT: add +; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} +; SI: s_endpgm +define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fneg = fsub float -0.000000e+00, %x + %floor = call float @llvm.floor.f32(float %x.fneg) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg: +; SI-NOT: add +; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| +; SI: s_endpgm +define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs + %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}no_cvt_flr_i32_f32_0: +; SI-NOT: v_cvt_flr_i32_f32 +; SI: v_floor_f32 +; SI: v_cvt_u32_f32_e32 +; SI: s_endpgm +define void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { + %floor = call float @llvm.floor.f32(float %x) #1 + %cvt = fptoui float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/R600/cvt_rpi_i32_f32.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/cvt_rpi_i32_f32.ll @@ -0,0 +1,76 @@ +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare float @llvm.fabs.f32(float) #1 +declare float @llvm.floor.f32(float) #1 + +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32: +; SI: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI: s_endpgm +define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 { + %fadd = fadd float %x, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs: +; SI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} +; SI: s_endpgm +define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %fadd = fadd float %x.fabs, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FIXME: This doesn't work because it forms fsub 0 +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg: +; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} +; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}} +; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] +; SI: s_endpgm +define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fneg = fsub float -0.000000e+00, %x + %fadd = fadd float %x.fneg, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FIXME: This doesn't work for same reason as above +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg: +; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| + +; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}| +; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] +; SI: s_endpgm +define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs + %fadd = fadd float %x.fabs.fneg, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}no_cvt_rpi_i32_f32_0: +; SI-NOT: v_cvt_rpi_i32_f32 +; SI: v_add_f32 +; SI: v_floor_f32 +; SI: v_cvt_u32_f32 +; SI: s_endpgm +define void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { + %fadd = fadd float %x, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptoui float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }