Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -129,6 +129,13 @@ [llvm_i32_ty], [llvm_anyfloat_ty], [IntrNoMem] >; +// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 +// and always uses rtz, so is not suitable for implementing the OpenCL +// fract function. It should be ok on VI. +def int_amdgcn_fract : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + def int_amdgcn_class : Intrinsic< [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem] >; Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -258,25 +258,6 @@ } // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 -let Predicates = [isCI] in { - -// Convert (x - floor(x)) to fract(x) -def : Pat < - (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), - (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), - (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) ->; - -// Convert (x + (-floor(x))) to fract(x) -def : Pat < - (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), - (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), - (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) ->; - -} // End Predicates = [isCI] - - //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1656,6 +1656,10 @@ case Intrinsic::amdgcn_ldexp: return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::amdgcn_fract: + return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); + case Intrinsic::amdgcn_class: return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT, Op.getOperand(1), Op.getOperand(2)); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1367,7 +1367,7 @@ >; defm V_FRACT_F64 : VOP1Inst , "v_fract_f64", - VOP_F64_F64 + VOP_F64_F64, AMDGPUfract >; } // End SchedRW = [WriteDoubleAdd] @@ -2469,7 +2469,22 @@ def : RsqPat; def : RsqPat; -} + +// Convert (x - floor(x)) to fract(x) +def : Pat < + (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), + (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), + (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +// Convert (x + (-floor(x))) to fract(x) +def : Pat < + (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), + (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), + (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +} // End Predicates = [UnsafeFPMath] //===----------------------------------------------------------------------===// // VOP2 Patterns @@ -3549,21 +3564,6 @@ // The workaround for the V_FRACT bug is: // fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999) -// Convert (x + (-floor(x)) to fract(x) -def : Pat < - (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), - (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), - (V_CNDMASK_B64_PSEUDO - (V_MIN_F64 - SRCMODS.NONE, - (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), - SRCMODS.NONE, - (V_MOV_B64_PSEUDO 0x3fefffffffffffff), - DSTCLAMP.NONE, DSTOMOD.NONE), - $x, - (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)) ->; - // Convert floor(x) to (x - fract(x)) def : Pat < (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))), Index: test/CodeGen/AMDGPU/fract.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fract.f64.ll +++ test/CodeGen/AMDGPU/fract.f64.ll @@ -1,20 +1,32 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=SI-UNSAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=VI-UNSAFE -check-prefix=FUNC %s + declare double @llvm.fabs.f64(double) #0 declare double @llvm.floor.f64(double) #0 ; FUNC-LABEL: {{^}}fract_f64: -; GCN-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] +; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] ; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] -; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] -; CI: buffer_store_dwordx2 [[FRC]] +; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}} +; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]] + +; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]] +; CI: v_floor_f64_e32 [[FLOORX:v\[[0-9]+:[0-9]+\]]], [[X]] +; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[FLOORX]] + +; GCN-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]] +; GCN-UNSAFE: v_fract_f64_e32 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]] + +; GCN: buffer_store_dwordx2 [[FRACT]] define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 { %x = load double, double addrspace(1)* %src %floor.x = call double @llvm.floor.f64(double %x) @@ -24,15 +36,24 @@ } ; FUNC-LABEL: {{^}}fract_f64_neg: -; GCN-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] +; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] ; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] -; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] -; CI: buffer_store_dwordx2 [[FRC]] +; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}} +; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]] + +; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]] +; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -[[X]] +; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -[[X]], -[[FLOORX]] + +; GCN-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]] +; GCN-UNSAFE: v_fract_f64_e64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -[[X]] + +; GCN: buffer_store_dwordx2 [[FRACT]] define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) #1 { %x = load double, double addrspace(1)* %src %neg.x = fsub double -0.0, %x @@ -43,15 +64,24 @@ } ; FUNC-LABEL: {{^}}fract_f64_neg_abs: -; GCN-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]| +; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]| ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] ; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] -; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] -; CI: buffer_store_dwordx2 [[FRC]] +; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}} +; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -[[SUB0]] + +; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]] +; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -|[[X]]| +; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|[[X]]|, -[[FLOORX]] + +; GCN-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]] +; GCN-UNSAFE: v_fract_f64_e64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|[[X]]| + +; GCN: buffer_store_dwordx2 [[FRACT]] define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) #1 { %x = load double, double addrspace(1)* %src %abs.x = call double @llvm.fabs.f64(double %x) @@ -62,5 +92,20 @@ ret void } +; FUNC-LABEL: {{^}}multi_use_floor_fract_f64: +; VI-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]] +; VI-UNSAFE-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[X]] +; VI-UNSAFE-DAG: v_fract_f64_e32 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]] +; VI-UNSAFE: buffer_store_dwordx2 [[FLOOR]] +; VI-UNSAFE: buffer_store_dwordx2 [[FRACT]] +define void @multi_use_floor_fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 { + %x = load double, double addrspace(1)* %src + %floor.x = call double @llvm.floor.f64(double %x) + %fract = fsub double %x, %floor.x + store volatile double %floor.x, double addrspace(1)* %out + store volatile double %fract, double addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/fract.ll =================================================================== --- test/CodeGen/AMDGPU/fract.ll +++ test/CodeGen/AMDGPU/fract.ll @@ -1,18 +1,19 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s declare float @llvm.fabs.f32(float) #0 declare float @llvm.floor.f32(float) #0 -; FUNC-LABEL: {{^}}fract_f32: -; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]] -; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]] -; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]] -; GCN: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}fract_f32: +; GCN-SAFE: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]] +; GCN-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]] + +; GCN-UNSAFE: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]] -; XEG: FRACT +; GCN: buffer_store_dword [[RESULT]] define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 { %x = load float, float addrspace(1)* %src %floor.x = call float @llvm.floor.f32(float %x) @@ -21,13 +22,13 @@ ret void } -; FUNC-LABEL: {{^}}fract_f32_neg: -; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT:v[0-9]+]] -; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -[[INPUT:v[0-9]+]] -; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT]], [[FLR]] -; GCN: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}fract_f32_neg: +; GCN-SAFE: v_floor_f32_e64 [[FLR:v[0-9]+]], -[[INPUT:v[0-9]+]] +; GCN-SAFE: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT]], [[FLR]] -; XEG: FRACT +; GCN-UNSAFE: v_fract_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT:v[0-9]+]] + +; GCN: buffer_store_dword [[RESULT]] define void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) #1 { %x = load float, float addrspace(1)* %src %x.neg = fsub float -0.0, %x @@ -37,13 +38,13 @@ ret void } -; FUNC-LABEL: {{^}}fract_f32_neg_abs: -; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT:v[0-9]+]]| -; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -|[[INPUT:v[0-9]+]]| -; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT]]|, [[FLR]] -; GCN: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}fract_f32_neg_abs: +; GCN-SAFE: v_floor_f32_e64 [[FLR:v[0-9]+]], -|[[INPUT:v[0-9]+]]| +; GCN-SAFE: v_sub_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT]]|, [[FLR]] + +; GCN-UNSAFE: v_fract_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT:v[0-9]+]]| -; XEG: FRACT +; GCN: buffer_store_dword [[RESULT]] define void @fract_f32_neg_abs(float addrspace(1)* %out, float addrspace(1)* %src) #1 { %x = load float, float addrspace(1)* %src %abs.x = call float @llvm.fabs.f32(float %x) @@ -54,5 +55,20 @@ ret void } +; GCN-LABEL: {{^}}multi_use_floor_fract_f32: +; GCN-UNSAFE-DAG: v_floor_f32_e32 [[FLOOR:v[0-9]+]], [[INPUT:v[0-9]+]] +; GCN-UNSAFE-DAG: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[INPUT:v[0-9]+]] + +; GCN-UNSAFE: buffer_store_dword [[FLOOR]] +; GCN-UNSAFE: buffer_store_dword [[FRACT]] +define void @multi_use_floor_fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 { + %x = load float, float addrspace(1)* %src + %floor.x = call float @llvm.floor.f32(float %x) + %fract = fsub float %x, %floor.x + store volatile float %floor.x, float addrspace(1)* %out + store volatile float %fract, float addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s + +declare float @llvm.amdgcn.fract.f32(float) #0 +declare double @llvm.amdgcn.fract.f64(double) #0 + +; GCN-LABEL: {{^}}v_fract_f32: +; GCN: v_fract_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} +define void @v_fract_f32(float addrspace(1)* %out, float %src) #1 { + %fract = call float @llvm.amdgcn.fract.f32(float %src) + store float %fract, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fract_f64: +; GCN: v_fract_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @v_fract_f64(double addrspace(1)* %out, double %src) #1 { + %fract = call double @llvm.amdgcn.fract.f64(double %src) + store double %fract, double addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }